570.144

570.133.20
570.133.07
2026-01-27 11:39:46 +00:00 · 2025-04-27 09:05:17 +02:00 · 2025-04-17 17:56:49 +02:00 · 2025-03-19 14:13:05 +01:00 · 2025-03-03 19:08:20 +01:00 · 2025-02-27 17:32:23 +01:00
1215 changed files with 328137 additions and 187698 deletions
--- a/README.md
+++ b/README.md
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
 UBSAN_SANITIZE := y
 endif

+#
+# Command to create a symbolic link, explicitly resolving the symlink target
+# to an absolute path to abstract away the difference between Linux < 6.13,
+# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
+# Linux >= 6.13, where the CWD is the external module source tree.
+#
+# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
+# kernel modules which use precompiled binary object files.
+#
+
+quiet_cmd_symlink = SYMLINK $@
+ cmd_symlink = ln -sf $(abspath $<) $@
+
+
 $(foreach _module, $(NV_KERNEL_MODULES), \
 $(eval include $(src)/$(_module)/$(_module).Kbuild))

@@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"565.57.01\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.144\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/Makefile
+++ b/kernel-open/Makefile
@@ -52,6 +52,22 @@ else
    endif
  endif

+  # If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
+  # Look for the compiler specified there, and use it by default, if found.
+  ifeq ($(origin CC),default)
+    cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
+                      echo "$$CONFIG_CC_VERSION_TEXT"))
+
+    ifneq ($(cc_version_text),)
+      ifeq ($(shell command -v $(cc_version_text)),)
+          $(warning WARNING: Unable to locate the compiler $(cc_version_text) \
+            from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
+      else
+          CC=$(cc_version_text)
+      endif
+    endif
+  endif
+
  CC ?= cc
  LD ?= ld
  OBJDUMP ?= objdump
@@ -65,6 +81,16 @@ else
    )
  endif

+  KERNEL_ARCH = $(ARCH)
+
+  ifneq ($(filter $(ARCH),i386 x86_64),)
+    KERNEL_ARCH = x86
+  else
+    ifeq ($(filter $(ARCH),arm64 powerpc),)
+        $(error Unsupported architecture $(ARCH))
+    endif
+  endif
+
  NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
  NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
                                    $(NV_KERNEL_MODULES))
@@ -106,8 +132,9 @@ else
  # module symbols on which the Linux kernel's module resolution is dependent
  # and hence must be used whenever present.

-  LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds      \
-               $(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
+  LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds             \
+               $(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
+               $(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds         \
               $(KERNEL_OUTPUT)/scripts/module.lds
  NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))

--- a/kernel-open/common/inc/nv-firmware.h
+++ b/kernel-open/common/inc/nv-firmware.h
@@ -32,7 +32,10 @@
 typedef enum
 {
    NV_FIRMWARE_TYPE_GSP,
-    NV_FIRMWARE_TYPE_GSP_LOG
+    NV_FIRMWARE_TYPE_GSP_LOG,
+#if defined(NV_VMWARE)
+    NV_FIRMWARE_TYPE_BINDATA
+#endif
 } nv_firmware_type_t;

 typedef enum
@@ -45,6 +48,7 @@ typedef enum
    NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
    NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
    NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
+    NV_FIRMWARE_CHIP_FAMILY_GB20X = 9,
    NV_FIRMWARE_CHIP_FAMILY_END,
 } nv_firmware_chip_family_t;

@@ -54,6 +58,7 @@ static inline const char *nv_firmware_chip_family_to_string(
 {
    switch (fw_chip_family) {
        case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
+        case NV_FIRMWARE_CHIP_FAMILY_GB20X: return "gb20x";
        case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
        case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
        case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
@@ -84,6 +89,7 @@ static inline const char *nv_firmware_for_chip_family(
        switch (fw_chip_family)
        {
            case NV_FIRMWARE_CHIP_FAMILY_GB10X:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_GB20X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
@@ -104,6 +110,7 @@ static inline const char *nv_firmware_for_chip_family(
        switch (fw_chip_family)
        {
            case NV_FIRMWARE_CHIP_FAMILY_GB10X:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_GB20X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
@@ -119,7 +126,12 @@ static inline const char *nv_firmware_for_chip_family(
                return "";
        }
    }
-
+#if defined(NV_VMWARE)
+    else if (fw_type == NV_FIRMWARE_TYPE_BINDATA)
+    {
+        return NV_FIRMWARE_FOR_NAME("bindata_image");
+    }
+#endif
    return "";
 }
 #endif  // defined(NV_FIRMWARE_FOR_NAME)
--- a/kernel-open/common/inc/nv-ioctl.h
+++ b/kernel-open/common/inc/nv-ioctl.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -128,6 +128,9 @@ typedef struct nv_ioctl_register_fd

 #define NV_DMABUF_EXPORT_MAX_HANDLES 128

+#define NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT        0
+#define NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE     1
+
 typedef struct nv_ioctl_export_to_dma_buf_fd
 {
    int         fd;
@@ -136,6 +139,7 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
    NvU32       numObjects;
    NvU32       index;
    NvU64       totalSize NV_ALIGN_BYTES(8);
+    NvU8        mappingType;
    NvHandle    handles[NV_DMABUF_EXPORT_MAX_HANDLES];
    NvU64       offsets[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
    NvU64       sizes[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -231,12 +231,6 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
        const char *, va_list);
 #endif

-#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
-#include <asm/eeh.h>
-#define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled()
-#define NV_PCI_ERROR_RECOVERY
-#endif
-
 #if defined(NV_ASM_SET_MEMORY_H_PRESENT)
 #include <asm/set_memory.h>
 #endif
@@ -351,8 +345,6 @@ extern int nv_pat_mode;

 #define NV_PAGE_COUNT(page) \
  ((unsigned int)page_count(page))
-#define NV_GET_PAGE_COUNT(page_ptr) \
-  (NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)))
 #define NV_GET_PAGE_FLAGS(page_ptr) \
  (NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)

@@ -609,7 +601,7 @@ static NvBool nv_numa_node_has_memory(int node_id)

 #define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \
    { \
-        (ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \
+        (ptr) = (unsigned long) alloc_pages_node(nid, gfp_mask, order); \
    }

 #define NV_GET_FREE_PAGES(ptr, order, gfp_mask)      \
@@ -881,16 +873,6 @@ typedef void irqreturn_t;
 #define PCI_CAP_ID_EXP 0x10
 #endif

-/*
- * On Linux on PPC64LE enable basic support for Linux PCI error recovery (see
- * Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error
- * notification and data collection, not actual recovery of the device.
- */
-#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
-#include <asm/eeh.h>
-#define NV_PCI_ERROR_RECOVERY
-#endif
-
 /*
 * If the host OS has page sizes larger than 4KB, we may have a security
 * problem. Registers are typically grouped in 4KB pages, but if there are
@@ -1177,10 +1159,6 @@ typedef struct nvidia_pte_s {
    NvU64           phys_addr;
    unsigned long   virt_addr;
    NvU64           dma_addr;
-#ifdef CONFIG_XEN
-    unsigned int    guest_pfn;
-#endif
-    unsigned int    page_count;
 } nvidia_pte_t;

 #if defined(CONFIG_DMA_SHARED_BUFFER)
@@ -1419,8 +1397,6 @@ typedef struct nv_dma_map_s {
            0 ? NV_OK : NV_ERR_OPERATING_SYSTEM)
 #endif

-typedef struct nv_ibmnpu_info nv_ibmnpu_info_t;
-
 typedef struct nv_work_s {
    struct work_struct task;
    void *data;
@@ -1468,7 +1444,6 @@ struct nv_dma_device {
    } addressable_range;

    struct device *dev;
-    NvBool nvlink;
 };

 /* Properties of the coherent link */
@@ -1517,9 +1492,6 @@ typedef struct nv_linux_state_s {
    struct device  *dev;
    struct pci_dev *pci_dev;

-    /* IBM-NPU info associated with this GPU */
-    nv_ibmnpu_info_t *npu;
-
    /* coherent link information */
     coherent_link_info_t coherent_link_info;

@@ -1835,7 +1807,7 @@ static inline int nv_is_control_device(struct inode *inode)
    return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
 }

-#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
+#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) || defined(NV_DEVICE_VM_BUILD)
 #define NV_VGX_HYPER
 #if defined(NV_XEN_IOEMU_INJECT_MSI)
 #include <xen/ioemu.h>
@@ -1872,59 +1844,6 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
 #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
 #endif

-/*
- * Starting on Power9 systems, DMA addresses for NVLink are no longer
- * the same as used over PCIe.
- *
- * Power9 supports a 56-bit Real Address. This address range is compressed
- * when accessed over NVLink to allow the GPU to access all of memory using
- * its 47-bit Physical address.
- *
- * If there is an NPU device present on the system, it implies that NVLink
- * sysmem links are present and we need to apply the required address
- * conversion for NVLink within the driver.
- *
- * See Bug 1920398 for further background and details.
- *
- * Note, a deviation from the documented compression scheme is that the
- * upper address bits (i.e. bit 56-63) instead of being set to zero are
- * preserved during NVLink address compression so the orignal PCIe DMA
- * address can be reconstructed on expansion. These bits can be safely
- * ignored on NVLink since they are truncated by the GPU.
- *
- * Bug 1968345: As a performance enhancement it is the responsibility of
- * the caller on PowerPC platforms to check for presence of an NPU device
- * before the address transformation is applied.
- */
-static inline NvU64 nv_compress_nvlink_addr(NvU64 addr)
-{
-    NvU64 addr47 = addr;
-
-#if defined(NVCPU_PPC64LE)
-    addr47 = addr & ((1ULL << 43) - 1);
-    addr47 |= (addr & (0x3ULL << 45)) >> 2;
-    WARN_ON(addr47 & (1ULL << 44));
-    addr47 |= (addr & (0x3ULL << 49)) >> 4;
-    addr47 |= addr & ~((1ULL << 56) - 1);
-#endif
-
-    return addr47;
-}
-
-static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47)
-{
-    NvU64 addr = addr47;
-
-#if defined(NVCPU_PPC64LE)
-    addr = addr47 & ((1ULL << 43) - 1);
-    addr |= (addr47 & (3ULL << 43)) << 2;
-    addr |= (addr47 & (3ULL << 45)) << 4;
-    addr |= addr47 & ~((1ULL << 56) - 1);
-#endif
-
-    return addr;
-}
-
 // Default flags for ISRs
 static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
 {
--- a/kernel-open/common/inc/nv-pci.h
+++ b/kernel-open/common/inc/nv-pci.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,5 +36,6 @@ int nv_pci_count_devices(void);
 NvU8 nv_find_pci_capability(struct pci_dev *, NvU8);
 int nvidia_dev_get_pci_info(const NvU8 *, struct pci_dev **, NvU64 *, NvU64 *);
 nv_linux_state_t * find_pci(NvU32, NvU8, NvU8, NvU8);
+NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct device *);

 #endif
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -368,6 +368,8 @@ typedef struct nv_state_t
    {
        NvBool         valid;
        NvU8           uuid[GPU_UUID_LEN];
+        NvBool         pci_uuid_read_attempted;
+        NV_STATUS      pci_uuid_status;
    } nv_uuid_cache;
    void *handle;

@@ -479,6 +481,8 @@ typedef struct nv_state_t
    /* Bool to check if the GPU has a coherent sysmem link */
    NvBool coherent;

+    /* OS detected GPU has ATS capability */
+    NvBool ats_support;
    /*
     * NUMA node ID of the CPU to which the GPU is attached.
     * Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
@@ -494,6 +498,9 @@ typedef struct nv_state_t
        NvU32 dispIsoStreamId;
        NvU32 dispNisoStreamId;
    } iommus;
+
+    /* Console is managed by drm drivers or NVKMS */
+    NvBool client_managed_console;
 } nv_state_t;

 #define NVFP_TYPE_NONE       0x0
@@ -538,9 +545,9 @@ typedef struct UvmGpuNvlinkInfo_tag                 *nvgpuNvlinkInfo_t;
 typedef struct UvmGpuEccInfo_tag                    *nvgpuEccInfo_t;
 typedef struct UvmGpuFaultInfo_tag                  *nvgpuFaultInfo_t;
 typedef struct UvmGpuAccessCntrInfo_tag             *nvgpuAccessCntrInfo_t;
-typedef struct UvmGpuAccessCntrConfig_tag           *nvgpuAccessCntrConfig_t;
-typedef struct UvmGpuInfo_tag                       nvgpuInfo_t;
-typedef struct UvmGpuClientInfo_tag                 nvgpuClientInfo_t;
+typedef struct UvmGpuAccessCntrConfig_tag            nvgpuAccessCntrConfig_t;
+typedef struct UvmGpuInfo_tag                        nvgpuInfo_t;
+typedef struct UvmGpuClientInfo_tag                  nvgpuClientInfo_t;
 typedef struct UvmPmaAllocationOptions_tag          *nvgpuPmaAllocationOptions_t;
 typedef struct UvmPmaStatistics_tag                 *nvgpuPmaStatistics_t;
 typedef struct UvmGpuMemoryInfo_tag                 *nvgpuMemoryInfo_t;
@@ -570,7 +577,8 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
 #define NV_FLAG_PASSTHRU               0x0080
 #define NV_FLAG_SUSPENDED              0x0100
 #define NV_FLAG_SOC_IGPU               0x0200
-// Unused                              0x0400
+/* To be set when an FLR needs to be triggered after device shut down. */
+#define NV_FLAG_TRIGGER_FLR            0x0400
 #define NV_FLAG_PERSISTENT_SW_STATE    0x0800
 #define NV_FLAG_IN_RECOVERY            0x1000
 // Unused                              0x2000
@@ -613,6 +621,7 @@ typedef struct
    const char *gc6_support;
    const char *gcoff_support;
    const char *s0ix_status;
+    const char *db_support;
 } nv_power_info_t;

 #define NV_PRIMARY_VGA(nv)      ((nv)->primary_vga)
@@ -758,6 +767,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
 #define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
 #endif

+
 /*
 * driver internal interfaces
 */
@@ -813,7 +823,6 @@ NV_STATUS  NV_API_CALL  nv_dma_map_mmio          (nv_dma_device_t *, NvU64, NvU6
 void       NV_API_CALL  nv_dma_unmap_mmio        (nv_dma_device_t *, NvU64, NvU64);

 void       NV_API_CALL  nv_dma_cache_invalidate  (nv_dma_device_t *, void *);
-void       NV_API_CALL  nv_dma_enable_nvlink     (nv_dma_device_t *);

 NvS32  NV_API_CALL  nv_start_rc_timer            (nv_state_t *);
 NvS32  NV_API_CALL  nv_stop_rc_timer             (nv_state_t *);
@@ -840,9 +849,7 @@ NV_STATUS  NV_API_CALL  nv_acpi_mux_method       (nv_state_t *, NvU32 *, NvU32,

 NV_STATUS  NV_API_CALL  nv_log_error             (nv_state_t *, NvU32, const char *, va_list);

-NvU64      NV_API_CALL  nv_get_dma_start_address (nv_state_t *);
 NV_STATUS  NV_API_CALL  nv_set_primary_vga_status(nv_state_t *);
-NV_STATUS  NV_API_CALL  nv_pci_trigger_recovery  (nv_state_t *);
 NvBool     NV_API_CALL  nv_requires_dma_remap    (nv_state_t *);

 NvBool     NV_API_CALL  nv_is_rm_firmware_active(nv_state_t *);
@@ -855,19 +862,8 @@ void               NV_API_CALL nv_put_file_private(void *);
 NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
 NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);

-NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
-NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
-
-void      NV_API_CALL nv_wait_for_ibmnpu_rsync(nv_state_t *nv);
-
-void      NV_API_CALL nv_ibmnpu_cache_flush_range(nv_state_t *nv, NvU64, NvU64);
-
 void      NV_API_CALL nv_p2p_free_platform_data(void *data);

-#if defined(NVCPU_PPC64LE)
-NV_STATUS NV_API_CALL nv_get_nvlink_line_rate    (nv_state_t *, NvU32 *);
-#endif
-
 NV_STATUS NV_API_CALL nv_revoke_gpu_mappings     (nv_state_t *);
 void      NV_API_CALL nv_acquire_mmap_lock       (nv_state_t *);
 void      NV_API_CALL nv_release_mmap_lock       (nv_state_t *);
@@ -998,18 +994,24 @@ NV_STATUS  NV_API_CALL  rm_p2p_init_mapping       (nvidia_stack_t *, NvU64, NvU6
 NV_STATUS  NV_API_CALL  rm_p2p_destroy_mapping    (nvidia_stack_t *, NvU64);
 NV_STATUS  NV_API_CALL  rm_p2p_get_pages          (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_get_gpu_info       (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
-NV_STATUS  NV_API_CALL  rm_p2p_get_pages_persistent (nvidia_stack_t *,  NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
+NV_STATUS  NV_API_CALL  rm_p2p_get_pages_persistent (nvidia_stack_t *,  NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **);
 NV_STATUS  NV_API_CALL  rm_p2p_register_callback  (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_put_pages          (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_dma_map_pages      (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
 void       NV_API_CALL  rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
-NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, MemoryRange, void *, NvBool, MemoryArea *);
-void       NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, void *, NvBool, MemoryArea);
-NV_STATUS  NV_API_CALL  rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
+NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *,
+                                                   NvHandle, NvHandle, MemoryRange,
+                                                   NvU8, void *, NvBool, MemoryArea *);
+void       NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *,
+                                                    NvHandle, NvHandle, NvU8, void *,
+                                                    NvBool, MemoryArea);
+NV_STATUS  NV_API_CALL  rm_dma_buf_get_client_and_device(nvidia_stack_t *,
+                                                nv_state_t *, NvHandle, NvHandle,
+                                                NvU8, NvHandle *, NvHandle *,
+                                                NvHandle *, void **, NvBool *);
 void       NV_API_CALL  rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
-NV_STATUS  NV_API_CALL  rm_log_gpu_crash          (nv_stack_t *, nv_state_t *);

 void       NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
 NvBool     NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
@@ -1026,7 +1028,6 @@ NvBool     NV_API_CALL rm_is_device_sequestered(nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL rm_check_for_gpu_surprise_removal(nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL rm_set_external_kernel_client_count(nvidia_stack_t *, nv_state_t *, NvBool);
 NV_STATUS  NV_API_CALL rm_schedule_gpu_wakeup(nvidia_stack_t *, nv_state_t *);
-NvBool     NV_API_CALL rm_is_iommu_needed_for_sriov(nvidia_stack_t *, nv_state_t *);
 NvBool     NV_API_CALL rm_disable_iomap_wc(void);

 void       NV_API_CALL rm_init_dynamic_power_management(nvidia_stack_t *, nv_state_t *, NvBool);
@@ -1043,12 +1044,14 @@ void       NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
 NvBool     NV_API_CALL rm_is_altstack_in_use(void);

 /* vGPU VFIO specific functions */
-NV_STATUS  NV_API_CALL  nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
+NV_STATUS  NV_API_CALL  nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
+                                               NvU32 *, NvU32 *, NvU32);
 NV_STATUS  NV_API_CALL  nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
                                             NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
+NV_STATUS  NV_API_CALL  nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
 NV_STATUS  NV_API_CALL  nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
 NV_STATUS  NV_API_CALL  nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -592,6 +592,14 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
    Error codes:
      NV_ERR_GENERIC
      NV_ERR_NO_MEMORY
+      NV_ERR_INVALID_STATE
+      NV_ERR_NOT_SUPPORTED
+      NV_ERR_NOT_READY
+      NV_ERR_INVALID_LOCK_STATE
+      NV_ERR_INVALID_STATE
+      NV_ERR_NVLINK_FABRIC_NOT_READY
+      NV_ERR_NVLINK_FABRIC_FAILURE
+      NV_ERR_GPU_MEMORY_ONLINING_FAILURE
 */
 NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
                                  UvmGpuCaps *caps);
@@ -1048,7 +1056,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
 */
 NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
                                         UvmGpuAccessCntrInfo *pAccessCntrInfo,
-                                         UvmGpuAccessCntrConfig *pAccessCntrConfig);
+                                         const UvmGpuAccessCntrConfig *pAccessCntrConfig);

 /*******************************************************************************
    nvUvmInterfaceDisableAccessCntr
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -620,19 +620,12 @@ typedef struct UvmGpuClientInfo_tag
    NvHandle hSmcPartRef;
 } UvmGpuClientInfo;

-typedef enum
-{
-    UVM_GPU_CONF_COMPUTE_MODE_NONE,
-    UVM_GPU_CONF_COMPUTE_MODE_APM,
-    UVM_GPU_CONF_COMPUTE_MODE_HCC,
-    UVM_GPU_CONF_COMPUTE_MODE_COUNT
-} UvmGpuConfComputeMode;
-
 typedef struct UvmGpuConfComputeCaps_tag
 {
-    // Out: GPU's confidential compute mode
-    UvmGpuConfComputeMode mode;
-    // Is key rotation enabled for UVM keys
+    // Out: true if Confidential Computing is enabled on the GPU
+    NvBool bConfComputingEnabled;
+
+    // Out: true if key rotation is enabled (for UVM keys) on the GPU
    NvBool bKeyRotationEnabled;
 } UvmGpuConfComputeCaps;

@@ -746,6 +739,8 @@ typedef struct UvmGpuInfo_tag
    // to NVSwitch peers.
    NvU64 nvswitchEgmMemoryWindowStart;

+    // GPU supports ATS capability
+    NvBool atsSupport;
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
@@ -759,7 +754,10 @@ typedef struct UvmGpuFbInfo_tag
    NvBool bZeroFb;            // Zero FB mode enabled.
    NvU64  maxVidmemPageSize;  // Largest GPU page size to access vidmem.
    NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
+    NvU64  staticBar1StartOffset;  // The start offset of the the static mapping
    NvU64  staticBar1Size;     // The size of the static mapping
+    NvU32  heapStart;          // The start offset of heap in KB, helpful for MIG
+                               // systems
 } UvmGpuFbInfo;

 typedef struct UvmGpuEccInfo_tag
@@ -1105,24 +1103,9 @@ typedef enum
    UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
 } UVM_ACCESS_COUNTER_GRANULARITY;

-typedef enum
-{
-    UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
-    UVM_ACCESS_COUNTER_USE_LIMIT_QTR  = 2,
-    UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
-    UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
-} UVM_ACCESS_COUNTER_USE_LIMIT;
-
 typedef struct UvmGpuAccessCntrConfig_tag
 {
-    NvU32 mimcGranularity;
-
-    NvU32 momcGranularity;
-
-    NvU32 mimcUseLimit;
-
-    NvU32 momcUseLimit;
-
+    NvU32 granularity;
    NvU32 threshold;
 } UvmGpuAccessCntrConfig;

--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -544,6 +544,9 @@ struct NvKmsKapiCreateSurfaceParams {
     *      explicit_layout is NV_TRUE and layout is
     *      NvKmsSurfaceMemoryLayoutBlockLinear */
    NvU8 log2GobsPerBlockY;
+
+    /* [IN] Whether a surface can be updated directly on the screen */
+    NvBool noDisplayCaching;
 };

 enum NvKmsKapiAllocationType {
@@ -1011,6 +1014,17 @@ struct NvKmsKapiFunctionsTable {
        const void *pLinearAddress
    );

+    /*!
+     * Check if memory object allocated is video memory.
+     *
+     * \param [in]  memory           Memory allocated using allocateMemory()
+     *
+     * \return NV_TRUE if memory is vidmem, NV_FALSE otherwise.
+     */
+    NvBool (*isVidmem)(
+        const struct NvKmsKapiMemory *memory
+    );
+
    /*!
     * Create a formatted surface from an NvKmsKapiMemory object.
     *
--- a/kernel-open/common/inc/nvmisc.h
+++ b/kernel-open/common/inc/nvmisc.h
@@ -33,38 +33,18 @@ extern "C" {

 #include "nvtypes.h"

-#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
-//
-// Miscellaneous macros useful for bit field manipulations
-//
-// STUPID HACK FOR CL 19434692.  Will revert when fix CL is delivered bfm -> chips_a.
-#ifndef BIT
-#define BIT(b)                  (1U<<(b))
-#endif
-#ifndef BIT32
-#define BIT32(b)                ((NvU32)1U<<(b))
-#endif
-#ifndef BIT64
-#define BIT64(b)                ((NvU64)1U<<(b))
-#endif
-
-#endif
-
-//
-// It is recommended to use the following bit macros to avoid macro name
-// collisions with other src code bases.
-//
+// Miscellaneous macros useful for bit field manipulations.
 #ifndef NVBIT
-#define NVBIT(b)                  (1U<<(b))
+#define NVBIT(b)         (1U<<(b))
 #endif
 #ifndef NVBIT_TYPE
-#define NVBIT_TYPE(b, t)          (((t)1U)<<(b))
+#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
 #endif
 #ifndef NVBIT32
-#define NVBIT32(b)                NVBIT_TYPE(b, NvU32)
+#define NVBIT32(b)       NVBIT_TYPE(b, NvU32)
 #endif
 #ifndef NVBIT64
-#define NVBIT64(b)                NVBIT_TYPE(b, NvU64)
+#define NVBIT64(b)       NVBIT_TYPE(b, NvU64)
 #endif

 //Concatenate 2 32bit values to a 64bit value
@@ -72,7 +52,7 @@ extern "C" {

 // Helper macro's for 32 bit bitmasks
 #define NV_BITMASK32_ELEMENT_SIZE            (sizeof(NvU32) << 3)
-#define NV_BITMASK32_IDX(chId)               (((chId) & ~(0x1F)) >> 5)  
+#define NV_BITMASK32_IDX(chId)               (((chId) & ~(0x1F)) >> 5)
 #define NV_BITMASK32_OFFSET(chId)            ((chId) & (0x1F))
 #define NV_BITMASK32_SET(pChannelMask, chId) \
        (pChannelMask)[NV_BITMASK32_IDX(chId)] |= NVBIT(NV_BITMASK32_OFFSET(chId))
@@ -990,6 +970,22 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
 // Get the number of elements the specified fixed-size array
 #define NV_ARRAY_ELEMENTS(x)                   ((sizeof(x)/sizeof((x)[0])))

+#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
+//
+// Deprecated macros whose definition can be removed once the code base no longer references them.
+// Use the NVBIT* macros instead of these macros.
+//
+#ifndef BIT
+#define BIT(b)   (1U<<(b))
+#endif
+#ifndef BIT32
+#define BIT32(b) ((NvU32)1U<<(b))
+#endif
+#ifndef BIT64
+#define BIT64(b) ((NvU64)1U<<(b))
+#endif
+#endif
+
 #ifdef __cplusplus
 }
 #endif //__cplusplus
--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@@ -155,6 +155,11 @@ NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS,        0x0000007D, "Operation no
 NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED,      0x0000007E, "Test-only code path not enabled")
 NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED,              0x0000007F, "GFW secure boot failed")
 NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY,          0x00000080, "No more ZBC entry for the client")
+NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY,         0x00000081, "Nvlink Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
+NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE,           0x00000082, "Nvlink Fabric Probe failed")
+NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE,     0x00000083, "GPU Memory Onlining failed")
+NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
+NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR,       0x00000086, "An error occurred while trying to retire a resource")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -170,7 +170,7 @@ NvU32       NV_API_CALL  os_get_grid_csp_support     (void);
 void        NV_API_CALL  os_bug_check                (NvU32, const char *);
 NV_STATUS   NV_API_CALL  os_lock_user_pages          (void *, NvU64, void **, NvU32);
 NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **);
-NV_STATUS   NV_API_CALL  os_unlock_user_pages        (NvU64, void *);
+NV_STATUS   NV_API_CALL  os_unlock_user_pages        (NvU64, void *, NvU32);
 NV_STATUS   NV_API_CALL  os_match_mmap_offset        (void *, NvU64, NvU64 *);
 NV_STATUS   NV_API_CALL  os_get_euid                 (NvU32 *);
 NV_STATUS   NV_API_CALL  os_get_smbios_header        (NvU64 *pSmbsAddr);
@@ -178,6 +178,7 @@ NV_STATUS   NV_API_CALL  os_get_acpi_rsdp_from_uefi  (NvU32 *);
 void        NV_API_CALL  os_add_record_for_crashLog  (void *, NvU32);
 void        NV_API_CALL  os_delete_record_for_crashLog (void *);
 NV_STATUS   NV_API_CALL  os_call_vgpu_vfio           (void *, NvU32);
+NV_STATUS   NV_API_CALL  os_device_vm_present        (void);
 NV_STATUS   NV_API_CALL  os_numa_memblock_size       (NvU64 *);
 NV_STATUS   NV_API_CALL  os_alloc_pages_node         (NvS32, NvU32, NvU32, NvU64 *);
 NV_STATUS   NV_API_CALL  os_get_page                 (NvU64 address);
@@ -213,6 +214,7 @@ enum os_pci_req_atomics_type {
    OS_INTF_PCIE_REQ_ATOMICS_128BIT
 };
 NV_STATUS   NV_API_CALL  os_enable_pci_req_atomics   (void *, enum os_pci_req_atomics_type);
+void        NV_API_CALL  os_pci_trigger_flr(void *handle);
 NV_STATUS   NV_API_CALL  os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
 NV_STATUS   NV_API_CALL  os_numa_add_gpu_memory      (void *, NvU64, NvU64, NvU32 *);
 NV_STATUS   NV_API_CALL  os_numa_remove_gpu_memory   (void *, NvU64, NvU64, NvU32); 
@@ -220,6 +222,7 @@ NV_STATUS   NV_API_CALL  os_offline_page_at_address(NvU64 address);
 void*       NV_API_CALL  os_get_pid_info(void);
 void        NV_API_CALL  os_put_pid_info(void *pid_info);
 NV_STATUS   NV_API_CALL  os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
+NvBool      NV_API_CALL  os_is_init_ns(void);

 extern NvU32 os_page_size;
 extern NvU64 os_page_mask;
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@@ -81,7 +81,7 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
-NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -25,6 +25,7 @@ fi
 # VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
 # GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
 # GRID_BUILD_CSP parameter defined only for GRID CSP builds (GRID Guest driver for CSPs)
+# VGX_DEVICE_VM_BUILD parameter defined only for Device VM VGX build (vGPU Host driver)

 test_xen() {
    #
@@ -806,6 +807,16 @@ compile_test() {
            return
        ;;

+        device_vm_build)
+           # Add config parameter if running on Device VM.
+           if [ -n "$VGX_DEVICE_VM_BUILD" ]; then
+                echo "#define NV_DEVICE_VM_BUILD" | append_conftest "generic"
+            else
+                echo "#undef NV_DEVICE_VM_BUILD" | append_conftest "generic"
+            fi
+            return
+        ;;
+
        vfio_register_notifier)
            #
            # Check number of arguments required.
@@ -1273,6 +1284,77 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
        ;;

+        egm_module_helper_api_present)
+            #
+            # Determine if egm management api are present or not.
+            #
+            CODE="
+            #include <linux/pci.h>
+            #include <linux/nvgrace-egm.h>
+            void conftest_egm_module_helper_api_present() {
+                struct pci_dev *pdev;
+                register_egm_node(pdev);
+                unregister_egm_node(0);
+            }
+            "
+            compile_check_conftest "$CODE" "NV_EGM_MODULE_HELPER_API_PRESENT" "" "types"
+        ;;
+
+        egm_bad_pages_handling_support)
+            #
+            # Determine if egm_bad_pages_list is present or not.
+            #
+            CODE="
+            #include <linux/types.h>
+            #include <linux/egm.h>
+            void conftest_egm_bad_pages_handle() {
+                int ioctl = EGM_BAD_PAGES_LIST;
+                struct egm_bad_pages_list list;
+            }
+            "
+
+            compile_check_conftest "$CODE" "NV_EGM_BAD_PAGES_HANDLING_SUPPORT" "" "types"
+        ;;
+
+        class_create_has_no_owner_arg)
+            #
+            # Determine if the class_create API with the new signature
+            # is present or not.
+            #
+            # Added by commit 1aaba11da9aa ("driver core: class: remove
+            # module * from class_create()") in v6.4 (2023-03-13)
+            #
+            CODE="
+            #include <linux/device/class.h>
+            void conftest_class_create() {
+                struct class *class;
+                class = class_create(\"test\");
+            }"
+
+            compile_check_conftest "$CODE" "NV_CLASS_CREATE_HAS_NO_OWNER_ARG" "" "types"
+        ;;
+
+        class_devnode_has_const_arg)
+            #
+            # Determine if the class.devnode is present with the new signature.
+            #
+            # Added by commit ff62b8e6588f ("driver core: make struct
+            # class.devnode() take a const *") in v6.2 (2022-11-23)
+            #
+            CODE="
+            #include <linux/device.h>
+            static char *conftest_devnode(const struct device *device, umode_t *mode) {
+                return NULL;
+            }
+
+            void conftest_class_devnode() {
+                struct class class;
+                class.devnode = conftest_devnode;
+            }"
+
+            compile_check_conftest "$CODE" "NV_CLASS_DEVNODE_HAS_CONST_ARG" "" "types"
+        ;;
+
        pci_irq_vector_helpers)
            #
            # Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
@@ -1770,22 +1852,6 @@ compile_test() {
            fi
        ;;

-        pnv_pci_get_npu_dev)
-            #
-            # Determine if the pnv_pci_get_npu_dev function is present.
-            #
-            # Added by commit 5d2aa710e697 ("powerpc/powernv: Add support
-            # for Nvlink NPUs") in v4.5
-            #
-            CODE="
-            #include <linux/pci.h>
-            void conftest_pnv_pci_get_npu_dev() {
-                pnv_pci_get_npu_dev();
-            }"
-
-            compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
-        ;;
-
        kernel_write_has_pointer_pos_arg)
            #
            # Determine the pos argument type, which was changed by commit
@@ -2450,6 +2516,22 @@ compile_test() {
            fi
        ;;

+        file_operations_fop_unsigned_offset_present)
+            #
+            # Determine if the FOP_UNSIGNED_OFFSET define is present.
+            #
+            # Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
+            # fop_flags") in v6.12.
+            #
+            CODE="
+            #include <linux/fs.h>
+            int conftest_file_operations_fop_unsigned_offset_present(void) {
+                return FOP_UNSIGNED_OFFSET;
+            }"
+
+            compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
+        ;;
+
        pci_dev_has_ats_enabled)
            #
            # Determine if the 'pci_dev' data type has a 'ats_enabled' member.
@@ -5207,6 +5289,45 @@ compile_test() {

            compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
        ;;
+
+        follow_pte_arg_vma)
+            #
+            # Determine if the first argument of follow_pte is
+            # mm_struct or vm_area_struct.
+            #
+            # The first argument was changed from mm_struct to vm_area_struct by
+            # commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
+            #
+            CODE="
+            #include <linux/mm.h>
+
+            typeof(follow_pte) conftest_follow_pte_has_vma_arg;
+            int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
+                                                unsigned long address,
+                                                pte_t **ptep,
+                                                spinlock_t **ptl) {
+                return 0;
+            }"
+
+            compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
+        ;;
+
+        ptep_get)
+            #
+            # Determine if ptep_get() is present.
+            #
+            # ptep_get() was added by commit 481e980a7c19
+            # ("mm: Allow arches to provide ptep_get()")
+            #
+            CODE="
+            #include <linux/mm.h>
+            void conftest_ptep_get(void) {
+                ptep_get();
+            }"
+
+            compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
+        ;;
+
        drm_plane_atomic_check_has_atomic_state_arg)
            #
            # Determine if drm_plane_helper_funcs::atomic_check takes 'state'
@@ -5588,6 +5709,26 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_ICC_GET_PRESENT" "" "functions"
        ;;

+        devm_of_icc_get)
+            #
+            # Determine if devm_of_icc_get() function is present
+            #
+            # Added by commit e145d9a ("interconnect: Add devm_of_icc_get() as
+            # exported API for user interconnect API")
+            #
+            CODE="
+            #if defined(NV_LINUX_INTERCONNECT_H_PRESENT)
+            #include <linux/interconnect.h>
+            #endif
+            void conftest_devm_of_icc_get(void)
+            {
+                devm_of_icc_get();
+            }
+            "
+
+            compile_check_conftest "$CODE" "NV_DEVM_ICC_GET_PRESENT" "" "functions"
+        ;;
+
        icc_set_bw)
            #
            # Determine if icc_set_bw() function is present
@@ -6034,6 +6175,20 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
        ;;

+		pcie_reset_flr)
+            #
+            # Determine if the pcie_reset_flr() function is present
+            #
+            # Added by commit 56f107d ("PCI: Add pcie_reset_flr() with
+            # 'probe' argument") in v5.15.
+            CODE="
+            #include <linux/pci.h>
+            int conftest_pcie_reset_flr(void) {
+                return pcie_reset_flr();
+            }"
+            compile_check_conftest "$CODE" "NV_PCIE_RESET_FLR_PRESENT" "" "functions"
+        ;;
+
        devm_clk_bulk_get_all)
            #
            # Determine if devm_clk_bulk_get_all() function is present
@@ -6191,6 +6346,32 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
        ;;

+        acpi_video_register_backlight)
+            #
+            # Determine if acpi_video_register_backlight() function is present
+            #
+            # acpi_video_register_backlight was added by commit 3dbc80a3e4c55c
+            # (ACPI: video: Make backlight class device registration a separate
+            # step (v2)) for v6.0 (2022-09-02).
+            # Note: the include directive for <linux/types> in this conftest is
+            # necessary in order to support kernels between commit 0b9f7d93ca61
+            # ("ACPI / i915: ignore firmware requests backlight change") for
+            # v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port
+            # to new backlight interface selection API") for v4.2 (2015-07-16).
+            # Kernels within this range use the 'bool' type and the related
+            # 'false' value in <acpi/video.h> without first including the
+            # definitions of that type and value.
+            #
+            CODE="
+            #include <linux/types.h>
+            #include <acpi/video.h>
+            void conftest_acpi_video_register_backlight(void) {
+                acpi_video_register_backlight(0);
+            }"
+
+            compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions"
+        ;;
+
        acpi_video_backlight_use_native)
            #
            # Determine if acpi_video_backlight_use_native() function is present
@@ -6555,7 +6736,8 @@ compile_test() {
            # Determine whether drm_fbdev_ttm_setup is present.
            #
            # Added by commit aae4682e5d66 ("drm/fbdev-generic:
-            # Convert to fbdev-ttm") in v6.11.
+            # Convert to fbdev-ttm") in v6.11. Removed by commit
+            # 1000634477d8 ("drm/fbdev-ttm:Convert to client-setup") in v6.13.
            #
            CODE="
            #include <drm/drm_fb_helper.h>
@@ -6569,6 +6751,30 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
        ;;

+        drm_client_setup)
+            #
+            # Determine whether drm_client_setup is present.
+            #
+            # Added by commit d07fdf922592 ("drm/fbdev-ttm: Convert to
+            # client-setup") in v6.13 in drm/drm_client_setup.h, but then moved
+            # to drm/clients/drm_client_setup.h by commit b86711c6d6e2
+            # ("drm/client: Move public client header to clients/ subdirectory")
+            # in linux-next b86711c6d6e2.
+            #
+            CODE="
+            #include <drm/drm_fb_helper.h>
+            #if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
+            #include <drm/drm_client_setup.h>
+            #elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
+            #include <drm/clients/drm_client_setup.h>
+            #endif
+            void conftest_drm_client_setup(void) {
+                drm_client_setup();
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions"
+        ;;
+
        drm_output_poll_changed)
            #
            # Determine whether drm_mode_config_funcs.output_poll_changed
@@ -6592,6 +6798,38 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
        ;;

+        aperture_remove_conflicting_devices)
+            #
+            # Determine whether aperture_remove_conflicting_devices is present.
+            # 
+            # Added by commit 7283f862bd991 ("drm: Implement DRM aperture
+            # helpers under video/") in v6.0
+            CODE="
+            #if defined(NV_LINUX_APERTURE_H_PRESENT)
+            #include <linux/aperture.h>
+            #endif
+            void conftest_aperture_remove_conflicting_devices(void) {
+                aperture_remove_conflicting_devices();
+            }"
+            compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions"
+        ;;
+
+        aperture_remove_conflicting_pci_devices)
+            #
+            # Determine whether aperture_remove_conflicting_pci_devices is present.
+            #
+            # Added by commit 7283f862bd991 ("drm: Implement DRM aperture
+            # helpers under video/") in v6.0
+            CODE="
+            #if defined(NV_LINUX_APERTURE_H_PRESENT)
+            #include <linux/aperture.h>
+            #endif
+            void conftest_aperture_remove_conflicting_pci_devices(void) {
+                aperture_remove_conflicting_pci_devices();
+            }"
+            compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions"
+        ;;
+
        drm_aperture_remove_conflicting_pci_framebuffers)
            #
            # Determine whether drm_aperture_remove_conflicting_pci_framebuffers is present.
@@ -6685,17 +6923,17 @@ compile_test() {
            # This test is not complete and may return false positive.
            #
            CODE="
-	    #include <crypto/akcipher.h>
-	    #include <crypto/algapi.h>
-	    #include <crypto/ecc_curve.h>
-	    #include <crypto/ecdh.h>
-	    #include <crypto/hash.h>
-	    #include <crypto/internal/ecc.h>
-	    #include <crypto/kpp.h>
-	    #include <crypto/public_key.h>
-	    #include <crypto/sm3.h>
-	    #include <keys/asymmetric-type.h>
-	    #include <linux/crypto.h>
+            #include <crypto/akcipher.h>
+            #include <crypto/algapi.h>
+            #include <crypto/ecc_curve.h>
+            #include <crypto/ecdh.h>
+            #include <crypto/hash.h>
+            #include <crypto/internal/ecc.h>
+            #include <crypto/kpp.h>
+            #include <crypto/public_key.h>
+            #include <crypto/sm3.h>
+            #include <keys/asymmetric-type.h>
+            #include <linux/crypto.h>
            void conftest_crypto(void) {
                struct shash_desc sd;
                struct crypto_shash cs;
@@ -6705,6 +6943,47 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols"
        ;;

+        crypto_akcipher_verify)
+            #
+            # Determine whether the crypto_akcipher_verify API is still present.
+            # It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations')
+            # in v6.13-rc1 (2024-10-04).
+            #
+            # This test is dependent on the crypto conftest to determine whether crypto should be
+            # enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify
+            #
+            # The test merely checks for the presence of the API, as it assumes that if the API
+            # is no longer present, the new API to replace it (crypto_sig_verify) must be present.
+            # If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto
+            # conftest above and all crypto code will be compiled out.
+            #
+            CODE="
+            #include <crypto/akcipher.h>
+            #include <linux/crypto.h>
+            void conftest_crypto_akcipher_verify(void) {
+                (void)crypto_akcipher_verify;
+            }"
+
+            compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols"
+            ;;
+
+        ecc_digits_from_bytes)
+            #
+            # Determine whether ecc_digits_from_bytes is present.
+            # It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from
+            # reading too many bytes') in v6.10.
+            #
+            # This functionality is needed when crypto_akcipher_verify is not present.
+            #
+            CODE="
+            #include <crypto/internal/ecc.h>
+            void conftest_ecc_digits_from_bytes(void) {
+                (void)ecc_digits_from_bytes;
+            }"
+
+            compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols"
+        ;;
+
        mempolicy_has_unified_nodes)
            #
            # Determine if the 'mempolicy' structure has
@@ -7126,6 +7405,131 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_FUNCS_PRESENT" "" "types"
        ;;

+        sg_dma_page_iter)
+            #
+            # Determine if the struct sg_dma_page_iter is present.
+            # This also serves to know if the argument type of the macro
+            # sg_page_iter_dma_address() changed:
+            # - before: struct sg_page_iter *piter
+            # - after:  struct sg_dma_page_iter *dma_iter
+            #
+            # Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA
+            # page iterator") v5.0.
+            #
+            CODE="
+            #include <linux/scatterlist.h>
+            struct sg_dma_page_iter conftest_dma_page_iter;"
+
+            compile_check_conftest "$CODE" "NV_SG_DMA_PAGE_ITER_PRESENT" "" "types"
+        ;;
+
+        # FIXME: See if we can remove this test
+        for_each_sgtable_dma_page)
+            #
+            # Determine if macro for_each_sgtable_dma_page is present.
+            #
+            # Added by commit 709d6d73c756 ("scatterlist: add generic wrappers
+            # for iterating over sgtable objects") v5.7.
+            #
+            CODE="
+            #include <linux/scatterlist.h>
+            void conftest_for_each_sgtable_dma_page(void) {
+                for_each_sgtable_dma_page();
+            }"
+
+            compile_check_conftest "$CODE" "NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT" "" "functions"
+        ;;
+
+        drm_aperture_remove_conflicting_framebuffers)
+            #
+            # Determine whether drm_aperture_remove_conflicting_framebuffers is present.
+            #
+            # drm_aperture_remove_conflicting_framebuffers was added in commit 2916059147ea
+            # ("drm/aperture: Add infrastructure for aperture ownership) in
+            # v5.14-rc1 (2021-04-12)
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
+            #include <drm/drm_aperture.h>
+            #endif
+            void conftest_drm_aperture_remove_conflicting_framebuffers(void) {
+                drm_aperture_remove_conflicting_framebuffers();
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT" "" "functions"
+        ;;
+
+        drm_aperture_remove_conflicting_framebuffers_has_driver_arg)
+            #
+            # Determine whether drm_aperture_remove_conflicting_framebuffers
+            # takes a struct drm_driver * as its fourth argument.
+            #
+            # Prior to commit 97c9bfe3f6605d41eb8f1206e6e0f62b31ba15d6, the
+            # second argument was a char * pointer to the driver's name.
+            #
+            # To test if drm_aperture_remove_conflicting_framebuffers() has
+            # a req_driver argument, define a function with the expected
+            # signature and then define the corresponding function
+            # implementation with the expected signature. Successful compilation
+            # indicates that this function has the expected signature.
+            #
+            # This change occurred in commit 97c9bfe3f660 ("drm/aperture: Pass
+            # DRM driver structure instead of driver name") in v5.15
+            # (2021-06-29).
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_DRV_H_PRESENT)
+            #include <drm/drm_drv.h>
+            #endif
+            #if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
+            #include <drm/drm_aperture.h>
+            #endif
+            typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
+            int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
+                                                                      bool primary, const struct drm_driver *req_driver)
+            {
+                return 0;
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG" "" "types"
+        ;;
+
+        drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg)
+            #
+            # Determine whether drm_aperture_remove_conflicting_framebuffers
+            # has its third argument as a bool.
+            #
+            # Prior to commit 62aeaeaa1b267c5149abee6b45967a5df3feed58, the
+            # third argument was a bool for figuring out whether the legacy vga
+            # stuff should be nuked, but it's only for pci devices and not
+            # really needed in this function.
+            #
+            # To test if drm_aperture_remove_conflicting_framebuffers() has
+            # a bool primary argument, define a function with the expected
+            # signature and then define the corresponding function
+            # implementation with the expected signature. Successful compilation
+            # indicates that this function has the expected signature.
+            #
+            # This change occurred in commit 62aeaeaa1b26 ("drm/aperture: Remove
+            # primary argument") in v6.5 (2023-04-16).
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_DRV_H_PRESENT)
+            #include <drm/drm_drv.h>
+            #endif
+            #if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
+            #include <drm/drm_aperture.h>
+            #endif
+            typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
+            int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
+                                                                      const struct drm_driver *req_driver)
+            {
+                return 0;
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG" "" "types"
+        ;;
+
        struct_page_has_zone_device_data)
            #
            # Determine if struct page has a 'zone_device_data' field.
@@ -7158,6 +7562,75 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
        ;;

+        module_import_ns_takes_constant)
+            #
+            # Determine if the MODULE_IMPORT_NS macro takes a string literal
+            # or constant.
+            #
+            # Commit cdd30ebb1b9f ("module: Convert symbol namespace to
+            # string literal") changed MODULE_IMPORT_NS to take a string
+            # literal in Linux kernel v6.13.
+            #
+            CODE="
+            #include <linux/module.h>
+
+            MODULE_IMPORT_NS(DMA_BUF);"
+
+            compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
+        ;;
+
+
+        drm_driver_has_date)
+            #
+            # Determine if the 'drm_driver' structure has a 'date' field.
+            #
+            # Removed by commit cb2e1c2136f7 ("drm: remove driver date from
+            # struct drm_driver and all drivers") in linux-next, expected in
+            # v6.14.
+            #
+            CODE="
+            #if defined(NV_DRM_DRMP_H_PRESENT)
+            #include <drm/drmP.h>
+            #endif
+
+            #if defined(NV_DRM_DRM_DRV_H_PRESENT)
+            #include <drm/drm_drv.h>
+            #endif
+
+            int conftest_drm_driver_has_date(void) {
+                return offsetof(struct drm_driver, date);
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
+        ;;
+
+        drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
+            #
+            # Determine if the 'mode' pointer argument is const in
+            # drm_connector_helper_funcs::mode_valid.
+            #
+            # The 'mode' pointer argument in
+            # drm_connector_helper_funcs::mode_valid was made const by commit
+            # 26d6fd81916e ("drm/connector: make mode_valid take a const struct
+            # drm_display_mode") in linux-next, expected in v6.15.
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
+            #include <drm/drm_atomic_helper.h>
+            #endif
+
+            static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
+                                                         const struct drm_display_mode *mode) {
+                return 0;
+            }
+
+            const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
+                .mode_valid = conftest_drm_connector_mode_valid,
+            };"
+
+            compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
+        ;;
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.  Please
        # avoid specifying -rc kernels, and only use SHAs that actually exist
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -16,6 +16,7 @@ NV_HEADER_PRESENCE_TESTS = \
  drm/drm_drv.h \
  drm/drm_fbdev_generic.h \
  drm/drm_fbdev_ttm.h \
+  drm/drm_client_setup.h \
  drm/drm_framebuffer.h \
  drm/drm_connector.h \
  drm/drm_probe_helper.h \
@@ -30,10 +31,13 @@ NV_HEADER_PRESENCE_TESTS = \
  drm/drm_mode_config.h \
  drm/drm_modeset_lock.h \
  drm/drm_property.h \
+  drm/clients/drm_client_setup.h \
  dt-bindings/interconnect/tegra_icc_id.h \
  generated/autoconf.h \
  generated/compile.h \
  generated/utsrelease.h \
+  linux/aperture.h \
+  linux/dma-direct.h \
  linux/efi.h \
  linux/kconfig.h \
  linux/platform/tegra/mc_utils.h \
@@ -102,5 +106,6 @@ NV_HEADER_PRESENCE_TESTS = \
  asm/cpufeature.h \
  linux/mpi.h \
  asm/mshyperv.h \
+  crypto/sig.h \
  linux/pfn_t.h

--- a/kernel-open/nvidia-drm/nvidia-drm-conftest.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-conftest.h
@@ -62,6 +62,17 @@
 #undef NV_DRM_FENCE_AVAILABLE
 #endif

+#if defined(NV_DRM_CLIENT_SETUP_PRESENT) &&                                    \
+    (defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) ||   \
+     defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
+// XXX remove dependency on DRM_TTM_HELPER by implementing nvidia-drm's own
+// .fbdev_probe callback that uses NVKMS kapi
+#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
+#define NV_DRM_FBDEV_AVAILABLE
+#define NV_DRM_CLIENT_AVAILABLE
+#endif
+#endif
+
 /*
 * We can support color management if either drm_helper_crtc_enable_color_mgmt()
 * or drm_crtc_enable_color_mgmt() exist.
--- a/kernel-open/nvidia-drm/nvidia-drm-connector.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-connector.c
@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
 }

 static int nv_drm_connector_mode_valid(struct drm_connector    *connector,
+#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
+                                       const struct drm_display_mode *mode)
+#else
                                       struct drm_display_mode *mode)
+#endif
 {
    struct drm_device *dev = connector->dev;
    struct nv_drm_device *nv_dev = to_nv_device(dev);
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -64,11 +64,24 @@
 #include <drm/drm_ioctl.h>
 #endif

-#if defined(NV_DRM_FBDEV_AVAILABLE)
+#if defined(NV_LINUX_APERTURE_H_PRESENT)
+#include <linux/aperture.h>
+#endif
+
+#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
 #include <drm/drm_aperture.h>
+#endif
+
+#if defined(NV_DRM_FBDEV_AVAILABLE)
 #include <drm/drm_fb_helper.h>
 #endif

+#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
+#include <drm/drm_client_setup.h>
+#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
+#include <drm/clients/drm_client_setup.h>
+#endif
+
 #if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
 #include <drm/drm_fbdev_ttm.h>
 #elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
@@ -1711,6 +1724,10 @@ static const struct file_operations nv_drm_fops = {
    .read           = drm_read,

    .llseek         = noop_llseek,
+
+#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
+    .fop_flags   = FOP_UNSIGNED_OFFSET,
+#endif
 };

 static const struct drm_ioctl_desc nv_drm_ioctls[] = {
@@ -1900,13 +1917,20 @@ static struct drm_driver nv_drm_driver = {
    .name                   = "nvidia-drm",

    .desc                   = "NVIDIA DRM driver",
+
+#if defined(NV_DRM_DRIVER_HAS_DATE)
    .date                   = "20160202",
+#endif

 #if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
    .device_list            = LIST_HEAD_INIT(nv_drm_driver.device_list),
 #elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
    .legacy_dev_list        = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
 #endif
+// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
+#if defined(NV_DRM_FBDEV_AVAILABLE) && defined(DRM_FBDEV_TTM_DRIVER_OPS)
+    DRM_FBDEV_TTM_DRIVER_OPS,
+#endif
 };


@@ -2009,14 +2033,22 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
        if (bus_is_pci) {
            struct pci_dev *pdev = to_pci_dev(device);

+#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
+
 #if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
            drm_aperture_remove_conflicting_pci_framebuffers(pdev, &nv_drm_driver);
 #else
            drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
 #endif
+
+#elif defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT)
+            aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
+#endif
            nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
        }
-        #if defined(NV_DRM_FBDEV_TTM_AVAILABLE)
+        #if defined(NV_DRM_CLIENT_AVAILABLE)
+	    drm_client_setup(dev, NULL);
+        #elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
        drm_fbdev_ttm_setup(dev, 32);
        #elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
        drm_fbdev_generic_setup(dev, 32);
--- a/kernel-open/nvidia-drm/nvidia-drm-fb.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c
@@ -161,6 +161,20 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
            params.planes[i].memory = nv_gem->pMemory;
            params.planes[i].offset = fb->offsets[i];
            params.planes[i].pitch = fb->pitches[i];
+
+            /*
+             * XXX Use drm_framebuffer_funcs.dirty and
+             * drm_fb_helper_funcs.fb_dirty instead
+             *
+             * Currently using noDisplayCaching when registering surfaces with
+             * NVKMS that are using memory allocated through the DRM
+             * Dumb-Buffers API. This prevents Display Idle Frame Rate from
+             * kicking in and preventing CPU updates to the surface memory from
+             * not being reflected on the display. Ideally, DIFR would be
+             * dynamically disabled whenever a user of the memory blits to the
+             * frontbuffer. DRM provides the needed callbacks to achieve this.
+             */
+            params.noDisplayCaching |= !!nv_gem->is_drm_dumb;
        }
    }
    params.height = fb->height;
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -167,7 +167,7 @@ static int __nv_drm_gem_nvkms_map(
        goto done;
    }

-    if (!nv_dev->hasVideoMemory) {
+    if (!nvKms->isVidmem(pMemory)) {
        goto done;
    }

@@ -218,11 +218,13 @@ static void *__nv_drm_gem_nvkms_prime_vmap(

    /*
     * If this buffer isn't physically mapped, it might be backed by struct
-     * pages. Use vmap in that case.
+     * pages. Use vmap in that case. Do a noncached mapping for system memory
+     * as display is non io-coherent device in case of Tegra.
     */
    if (nv_nvkms_memory->pages_count > 0) {
         return nv_drm_vmap(nv_nvkms_memory->pages,
-                            nv_nvkms_memory->pages_count);
+                            nv_nvkms_memory->pages_count,
+                            false);
    }

    return ERR_PTR(-ENOMEM);
@@ -310,7 +312,7 @@ static int __nv_drm_nvkms_gem_obj_init(
                               pMemory,
                               &pages,
                               &numPages) &&
-        !nv_dev->hasVideoMemory) {
+        !nvKms->isVidmem(pMemory)) {
        /* GetMemoryPages may fail for vidmem allocations,
         * but it should not fail for sysmem allocations. */
        NV_DRM_DEV_LOG_ERR(nv_dev,
@@ -383,6 +385,8 @@ int nv_drm_dumb_create(
        goto nvkms_gem_obj_init_failed;
    }

+    nv_nvkms_memory->base.is_drm_dumb = true;
+
    /* Always map dumb buffer memory up front.  Clients are only expected
     * to use dumb buffers for software rendering, so they're not much use
     * without a CPU mapping.
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
@@ -72,7 +72,8 @@ static void *__nv_drm_gem_user_memory_prime_vmap(
    struct nv_drm_gem_user_memory *nv_user_memory = to_nv_user_memory(nv_gem);

    return nv_drm_vmap(nv_user_memory->pages,
-                           nv_user_memory->pages_count);
+                           nv_user_memory->pages_count,
+                           true);
 }

 static void __nv_drm_gem_user_memory_prime_vunmap(
--- a/kernel-open/nvidia-drm/nvidia-drm-gem.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.c
@@ -172,8 +172,11 @@ struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
             */
            gem_dst = nv_gem_src->ops->prime_dup(dev, nv_gem_src);

-            if (gem_dst)
-                return gem_dst;
+            if (gem_dst == NULL) {
+                return ERR_PTR(-ENOTSUPP);
+            }
+
+            return gem_dst;
        }
    }
 #endif /* NV_DMA_BUF_OWNER_PRESENT */
--- a/kernel-open/nvidia-drm/nvidia-drm-gem.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.h
@@ -73,6 +73,8 @@ struct nv_drm_gem_object {

    struct NvKmsKapiMemory *pMemory;

+    bool is_drm_dumb;
+
 #if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
    nv_dma_resv_t  resv;
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-linux.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c
@@ -37,7 +37,7 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
 #if defined(NV_DRM_FBDEV_AVAILABLE)
 MODULE_PARM_DESC(
    fbdev,
-    "Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
+    "Create a framebuffer device (1 = enable (default), 0 = disable)");
 module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
 #endif

--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.c
@@ -42,7 +42,7 @@
 #endif

 bool nv_drm_modeset_module_param = false;
-bool nv_drm_fbdev_module_param = false;
+bool nv_drm_fbdev_module_param = true;

 void *nv_drm_calloc(size_t nmemb, size_t size)
 {
@@ -156,9 +156,15 @@ void nv_drm_unlock_user_pages(unsigned long  pages_count, struct page **pages)
 #define VM_USERMAP 0
 #endif

-void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
+void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached)
 {
-    return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
+    pgprot_t prot = PAGE_KERNEL;
+
+    if (!cached) {
+        prot = pgprot_noncached(PAGE_KERNEL);
+    }
+
+    return vmap(pages, pages_count, VM_USERMAP, prot);
 }

 void nv_drm_vunmap(void *address)
--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
@@ -90,7 +90,7 @@ int nv_drm_lock_user_pages(unsigned long address,

 void nv_drm_unlock_user_pages(unsigned long  pages_count, struct page **pages);

-void *nv_drm_vmap(struct page **pages, unsigned long pages_count);
+void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached);

 void nv_drm_vunmap(void *address);

--- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk
+++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk
@@ -66,8 +66,11 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
@@ -140,3 +143,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
+NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -89,6 +89,9 @@ module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool,
 static enum NvKmsDebugForceColorSpace debug_force_color_space = NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
 module_param_named(debug_force_color_space, debug_force_color_space, uint, 0400);

+static bool enable_overlay_layers = true;
+module_param_named(enable_overlay_layers, enable_overlay_layers, bool, 0400);
+
 /* These parameters are used for fault injection tests.  Normally the defaults
 * should be used. */
 MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@@ -99,19 +102,40 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
 static bool malloc_verbose = false;
 module_param_named(malloc_verbose, malloc_verbose, bool, 0400);

+/* Fail allocating the RM core channel for NVKMS using the i-th method (see
+ * FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
+MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
+static int fail_alloc_core_channel_method = -1;
+module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);
+
 #if NVKMS_CONFIG_FILE_SUPPORTED
 /* This parameter is used to find the dpy override conf file */
 #define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)

 MODULE_PARM_DESC(config_file,
-                 "Path to the nvidia-modeset configuration file "
-                 "(default: disabled)");
+                 "Path to the nvidia-modeset configuration file (default: disabled)");
 static char *nvkms_conf = NULL;
 module_param_named(config_file, nvkms_conf, charp, 0400);
 #endif

 static atomic_t nvkms_alloc_called_count;

+NvBool nvkms_test_fail_alloc_core_channel(
+    enum FailAllocCoreChannelMethod method
+)
+{
+    if (method != fail_alloc_core_channel_method) {
+        // don't fail if it's not the currently specified method
+        return NV_FALSE;
+    } 
+
+    printk(KERN_INFO NVKMS_LOG_PREFIX 
+        "Failing core channel allocation using method %d", 
+        fail_alloc_core_channel_method);    
+
+    return NV_TRUE;
+}
+    
 NvBool nvkms_output_rounding_fix(void)
 {
    return output_rounding_fix;
@@ -150,6 +174,11 @@ enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void)
    return debug_force_color_space;
 }

+NvBool nvkms_enable_overlay_layers(void)
+{
+    return enable_overlay_layers;
+}
+
 NvBool nvkms_kernel_supports_syncpts(void)
 {
 /*
@@ -1021,6 +1050,11 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,

 #if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
    if (!acpi_video_backlight_use_native()) {
+#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
+        nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
+                  "ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
+        acpi_video_register_backlight();
+#endif
        return NULL;
    }
 #endif
@@ -1463,6 +1497,8 @@ static size_t nvkms_config_file_open
    loff_t pos = 0;
 #endif

+    *buff = NULL;
+    
    if (!nvkms_fs_mounted()) {
        printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
        return 0;
@@ -1486,6 +1522,11 @@ static size_t nvkms_config_file_open
        goto done;
    }

+    // Do not alloc a 0 sized buffer
+    if (file_size == 0) {
+        goto done;
+    }
+
    *buff = nvkms_alloc(file_size, NV_FALSE);
    if (*buff == NULL) {
        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -104,6 +104,12 @@ typedef struct {
    } read_minval;
 } NvKmsSyncPtOpParams;

+enum FailAllocCoreChannelMethod {
+    FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
+    FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
+};
+
+NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
 NvBool nvkms_output_rounding_fix(void);
 NvBool nvkms_disable_hdmi_frl(void);
 NvBool nvkms_disable_vrr_memclk_switch(void);
@@ -111,6 +117,7 @@ NvBool nvkms_hdmi_deepcolor(void);
 NvBool nvkms_vblank_sem_control(void);
 NvBool nvkms_opportunistic_display_sync(void);
 enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
+NvBool nvkms_enable_overlay_layers(void);

 void   nvkms_call_rm    (void *ops);
 void*  nvkms_alloc      (size_t size,
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
 NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
 NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o

-quiet_cmd_symlink = SYMLINK $@
-cmd_symlink = ln -sf $< $@
-
 targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)

 $(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
@@ -105,4 +102,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
--- a/kernel-open/nvidia-peermem/nv-p2p.h
+++ b/kernel-open/nvidia-peermem/nv-p2p.h
@@ -189,6 +189,12 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
        struct nvidia_p2p_page_table **page_table,
        void (*free_callback)(void *data), void *data);

+/*
+ * Flags to be used with persistent APIs
+ */
+#define NVIDIA_P2P_FLAGS_DEFAULT            0
+#define NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING 1
+
 /*
 * @brief
 *   Pin and make the pages underlying a range of GPU virtual memory
@@ -212,7 +218,11 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
 * @param[out]    page_table
 *   A pointer to an array of structures with P2P PTEs.
 * @param[in]     flags
- *   Must be set to zero for now.
+ *   NVIDIA_P2P_FLAGS_DEFAULT:
+ *     Default value to be used if no specific behavior is expected.
+ *   NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING:
+ *     Force BAR1 mappings on certain coherent platforms,
+ *     subject to capability and supported topology.
 *
 * @return
 *    0           upon successful completion.
--- a/kernel-open/nvidia-uvm/clc365.h
+++ b/kernel-open/nvidia-uvm/clc365.h
@@ -1,30 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
-// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc365_h_
 #define _clc365_h_
--- a/kernel-open/nvidia-uvm/clc369.h
+++ b/kernel-open/nvidia-uvm/clc369.h
@@ -1,30 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
-// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc369_h_
 #define _clc369_h_
--- a/kernel-open/nvidia-uvm/clc36f.h
+++ b/kernel-open/nvidia-uvm/clc36f.h
@@ -1,26 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc36f_h_
 #define _clc36f_h_
@@ -257,7 +256,6 @@ typedef volatile struct Nvc36fControl_struct {
 #define NVC36F_CLEAR_FAULTED_TYPE                                        31:31
 #define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
 #define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-#define NVC36F_QUADRO_VERIFY                                       (0x000000a0)


 /* GPFIFO entry format */
--- a/kernel-open/nvidia-uvm/clc46f.h
+++ b/kernel-open/nvidia-uvm/clc46f.h
@@ -1,26 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc46f_h_
 #define _clc46f_h_
@@ -259,7 +258,6 @@ typedef volatile struct Nvc46fControl_struct {
 #define NVC46F_CLEAR_FAULTED_TYPE                                        31:31
 #define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
 #define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-#define NVC46F_QUADRO_VERIFY                                       (0x000000a0)


 /* GPFIFO entry format */
--- a/kernel-open/nvidia-uvm/clc56f.h
+++ b/kernel-open/nvidia-uvm/clc56f.h
@@ -1,26 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc56f_h_
 #define _clc56f_h_
@@ -261,7 +260,6 @@ typedef volatile struct Nvc56fControl_struct {
 #define NVC56F_CLEAR_FAULTED_TYPE                                        31:31
 #define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
 #define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-#define NVC56F_QUADRO_VERIFY                                       (0x000000a0)


 /* GPFIFO entry format */
--- a/kernel-open/nvidia-uvm/clc5b5.h
+++ b/kernel-open/nvidia-uvm/clc5b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@

 *******************************************************************************/

-
-
 #include "nvtypes.h"

 #ifndef _clc5b5_h_
@@ -34,64 +32,6 @@ extern "C" {

 #define TURING_DMA_COPY_A                                                            (0x0000C5B5)

-typedef volatile struct _clc5b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x3F];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 Reserved03[0x2];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved04[0x6];
-    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
-    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
-    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
-    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
-    NvV32 Reserved05[0x1C];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved06[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved07[0xB8];
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved08[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved09[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved10[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved11[0x3BA];
-} turing_dma_copy_aControlPio;
-
 #define NVC5B5_NOP                                                              (0x00000100)
 #define NVC5B5_NOP_PARAMETER                                                    31:0
 #define NVC5B5_PM_TRIGGER                                                       (0x00000140)
@@ -125,14 +65,6 @@ typedef volatile struct _clc5b5_tag0 {
 #define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
 #define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
 #define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC5B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
-#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
-#define NVC5B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
-#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
-#define NVC5B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
-#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
-#define NVC5B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
-#define NVC5B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
 #define NVC5B5_LAUNCH_DMA                                                       (0x00000300)
 #define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
 #define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
@@ -199,8 +131,6 @@ typedef volatile struct _clc5b5_tag0 {
 #define NVC5B5_LAUNCH_DMA_VPRMODE                                               23:22
 #define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
 #define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
-#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
 #define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
 #define NVC5B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
 #define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
--- a/kernel-open/nvidia-uvm/clc6b5.h
+++ b/kernel-open/nvidia-uvm/clc6b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@

 *******************************************************************************/

-
-
 #include "nvtypes.h"

 #ifndef _clc6b5_h_
@@ -34,64 +32,6 @@ extern "C" {

 #define AMPERE_DMA_COPY_A                                                            (0x0000C6B5)

-typedef volatile struct _clc6b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x3F];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 Reserved03[0x2];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved04[0x6];
-    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
-    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
-    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
-    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
-    NvV32 Reserved05[0x1C];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved06[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved07[0xB8];
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved08[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved09[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved10[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved11[0x3BA];
-} ampere_dma_copy_aControlPio;
-
 #define NVC6B5_NOP                                                              (0x00000100)
 #define NVC6B5_NOP_PARAMETER                                                    31:0
 #define NVC6B5_PM_TRIGGER                                                       (0x00000140)
@@ -131,14 +71,6 @@ typedef volatile struct _clc6b5_tag0 {
 #define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
 #define NVC6B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
 #define NVC6B5_SET_DST_PHYS_MODE_FLA                                            9:9
-#define NVC6B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
-#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
-#define NVC6B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
-#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
-#define NVC6B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
-#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
-#define NVC6B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
-#define NVC6B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
 #define NVC6B5_LAUNCH_DMA                                                       (0x00000300)
 #define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
 #define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
@@ -199,8 +131,6 @@ typedef volatile struct _clc6b5_tag0 {
 #define NVC6B5_LAUNCH_DMA_VPRMODE                                               23:22
 #define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
 #define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
-#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
 #define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
 #define NVC6B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
 #define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
--- a/kernel-open/nvidia-uvm/clc7b5.h
+++ b/kernel-open/nvidia-uvm/clc7b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@

 *******************************************************************************/

-
-
 #include "nvtypes.h"

 #ifndef _clc7b5_h_
@@ -34,69 +32,6 @@ extern "C" {

 #define AMPERE_DMA_COPY_B                                                            (0x0000C7B5)

-typedef volatile struct _clc7b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x36];
-    NvV32 SetMonitoredFenceType;                                                // 0x0000021C - 0x0000021F
-    NvV32 SetMonitoredFenceSignalAddrBaseUpper;                                 // 0x00000220 - 0x00000223
-    NvV32 SetMonitoredFenceSignalAddrBaseLower;                                 // 0x00000224 - 0x00000227
-    NvV32 Reserved03[0x6];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 SetSemaphorePayloadUpper;                                             // 0x0000024C - 0x0000024F
-    NvV32 Reserved04[0x1];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved05[0x6];
-    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
-    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
-    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
-    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
-    NvV32 Reserved06[0x1C];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved07[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved08[0xB8];
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved09[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved10[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved11[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved12[0x3BA];
-} ampere_dma_copy_bControlPio;
-
 #define NVC7B5_NOP                                                              (0x00000100)
 #define NVC7B5_NOP_PARAMETER                                                    31:0
 #define NVC7B5_PM_TRIGGER                                                       (0x00000140)
@@ -146,14 +81,6 @@ typedef volatile struct _clc7b5_tag0 {
 #define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
 #define NVC7B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
 #define NVC7B5_SET_DST_PHYS_MODE_FLA                                            9:9
-#define NVC7B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
-#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
-#define NVC7B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
-#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
-#define NVC7B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
-#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
-#define NVC7B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
-#define NVC7B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
 #define NVC7B5_LAUNCH_DMA                                                       (0x00000300)
 #define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
 #define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
@@ -223,8 +150,6 @@ typedef volatile struct _clc7b5_tag0 {
 #define NVC7B5_LAUNCH_DMA_VPRMODE                                               23:22
 #define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
 #define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
-#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
 #define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
 #define NVC7B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
 #define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
--- a/kernel-open/nvidia-uvm/clca6f.h
+++ b/kernel-open/nvidia-uvm/clca6f.h
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __gb202_clca6f_h__
+#define __gb202_clca6f_h__
+
+typedef volatile struct Nvca6fControl_struct {
+ NvU32 Ignored00[0x23];        /*                                  0000-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored01[0x5c];
+} Nvca6fControl, BlackwellBControlGPFifo;
+
+#define  BLACKWELL_CHANNEL_GPFIFO_B                           (0x0000CA6F)
+
+#define NVCA6F_SET_OBJECT                                          (0x00000000)
+#define NVCA6F_SEM_ADDR_LO                                         (0x0000005c)
+#define NVCA6F_SEM_ADDR_LO_OFFSET                                         31:2
+#define NVCA6F_SEM_ADDR_HI                                         (0x00000060)
+#define NVCA6F_SEM_ADDR_HI_OFFSET                                         24:0
+#define NVCA6F_SEM_PAYLOAD_LO                                      (0x00000064)
+#define NVCA6F_SEM_PAYLOAD_HI                                      (0x00000068)
+#define NVCA6F_SEM_EXECUTE                                         (0x0000006c)
+#define NVCA6F_SEM_EXECUTE_OPERATION                                       2:0
+#define NVCA6F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
+#define NVCA6F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
+#define NVCA6F_SEM_EXECUTE_RELEASE_WFI                                   20:20
+#define NVCA6F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
+#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
+#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
+
+/* GPFIFO entry format */
+#define NVCA6F_GP_ENTRY__SIZE                                          8
+#define NVCA6F_GP_ENTRY0_FETCH                                       0:0
+#define NVCA6F_GP_ENTRY0_FETCH_UNCONDITIONAL                  0x00000000
+#define NVCA6F_GP_ENTRY0_FETCH_CONDITIONAL                    0x00000001
+#define NVCA6F_GP_ENTRY0_GET                                        31:2
+#define NVCA6F_GP_ENTRY0_OPERAND                                    31:0
+#define NVCA6F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND                   24:8
+#define NVCA6F_GP_ENTRY1_GET_HI                                      7:0
+#define NVCA6F_GP_ENTRY1_LEVEL                                       9:9
+#define NVCA6F_GP_ENTRY1_LEVEL_MAIN                           0x00000000
+#define NVCA6F_GP_ENTRY1_LEVEL_SUBROUTINE                     0x00000001
+#define NVCA6F_GP_ENTRY1_LENGTH                                    30:10
+#define NVCA6F_GP_ENTRY1_SYNC                                      31:31
+#define NVCA6F_GP_ENTRY1_SYNC_PROCEED                         0x00000000
+#define NVCA6F_GP_ENTRY1_SYNC_WAIT                            0x00000001
+#define NVCA6F_GP_ENTRY1_OPCODE                                      7:0
+#define NVCA6F_GP_ENTRY1_OPCODE_NOP                           0x00000000
+#define NVCA6F_GP_ENTRY1_OPCODE_ILLEGAL                       0x00000001
+#define NVCA6F_GP_ENTRY1_OPCODE_GP_CRC                        0x00000002
+#define NVCA6F_GP_ENTRY1_OPCODE_PB_CRC                        0x00000003
+#define NVCA6F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE  0x00000004
+
+#endif // __gb202_clca6f_h__
--- a/kernel-open/nvidia-uvm/clcab5.h
+++ b/kernel-open/nvidia-uvm/clcab5.h
@@ -0,0 +1,44 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _clcab5_h_
+#define _clcab5_h_
+
+#define BLACKWELL_DMA_COPY_B                                                            (0x0000CAB5)
+
+#define NVCAB5_LAUNCH_DMA                                                               (0x00000300)
+#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                            1:0
+#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                                       (0x00000000)
+#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                                  (0x00000001)
+#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                              (0x00000002)
+#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PREFETCH                                   (0x00000003)
+
+#define NVCAB5_REQ_ATTR                                                                 (0x00000754)
+#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS                                               1:0
+#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_FIRST                                   (0x00000000)
+#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_NORMAL                                  (0x00000001)
+#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_LAST                                    (0x00000002)
+#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_DEMOTE                                  (0x00000003)
+
+#endif /* _clcab5_h_ */
+
--- a/kernel-open/nvidia-uvm/clcba2.h
+++ b/kernel-open/nvidia-uvm/clcba2.h
@@ -1,25 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2021-2022 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #include "nvtypes.h"

@@ -32,6 +32,28 @@ extern "C" {

 #define HOPPER_SEC2_WORK_LAUNCH_A                                               (0x0000CBA2)

+typedef volatile struct _clcba2_tag0 {
+    NvV32 Reserved00[0x100];
+    NvV32 DecryptCopySrcAddrHi;                                                 // 0x00000400 - 0x00000403
+    NvV32 DecryptCopySrcAddrLo;                                                 // 0x00000404 - 0x00000407
+    NvV32 DecryptCopyDstAddrHi;                                                 // 0x00000408 - 0x0000040B
+    NvV32 DecryptCopyDstAddrLo;                                                 // 0x0000040c - 0x0000040F
+    NvU32 DecryptCopySize;                                                      // 0x00000410 - 0x00000413
+    NvU32 DecryptCopyAuthTagAddrHi;                                             // 0x00000414 - 0x00000417
+    NvU32 DecryptCopyAuthTagAddrLo;                                             // 0x00000418 - 0x0000041B
+    NvV32 DigestAddrHi;                                                         // 0x0000041C - 0x0000041F
+    NvV32 DigestAddrLo;                                                         // 0x00000420 - 0x00000423
+    NvV32 Reserved01[0x7];
+    NvV32 SemaphoreA;                                                           // 0x00000440 - 0x00000443
+    NvV32 SemaphoreB;                                                           // 0x00000444 - 0x00000447
+    NvV32 SemaphoreSetPayloadLower;                                             // 0x00000448 - 0x0000044B
+    NvV32 SemaphoreSetPayloadUppper;                                            // 0x0000044C - 0x0000044F
+    NvV32 SemaphoreD;                                                           // 0x00000450 - 0x00000453
+    NvU32 Reserved02[0x7];
+    NvV32 Execute;                                                              // 0x00000470 - 0x00000473
+    NvV32 Reserved03[0x23];
+} NVCBA2_HOPPER_SEC2_WORK_LAUNCH_AControlPio;
+
 #define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI                                         (0x00000400)
 #define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA                                    24:0
 #define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO                                         (0x00000404)
@@ -90,6 +112,45 @@ extern "C" {
 #define NVCBA2_EXECUTE_TIMESTAMP                                                5:5
 #define NVCBA2_EXECUTE_TIMESTAMP_DISABLE                                        (0x00000000)
 #define NVCBA2_EXECUTE_TIMESTAMP_ENABLE                                         (0x00000001)
+#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER                                        6:6
+#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_DISABLE                                (0x00000000)
+#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_ENABLE                                 (0x00000001)
+
+// Class definitions
+#define NVCBA2_DECRYPT_COPY_SIZE_MAX_BYTES                                      (2*1024*1024)
+#define NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES                                     (1024*1024*1024)
+
+// Errors
+#define NVCBA2_ERROR_NONE                                                       (0x00000000)
+#define NVCBA2_ERROR_DECRYPT_COPY_SRC_ADDR_MISALIGNED_POINTER                   (0x00000001)
+#define NVCBA2_ERROR_DECRYPT_COPY_DEST_ADDR_MISALIGNED_POINTER                  (0x00000002)
+#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_ADDR_MISALIGNED_POINTER              (0x00000003)
+#define NVCBA2_ERROR_DECRYPT_COPY_DMA_NACK                                      (0x00000004)
+#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_MISMATCH                             (0x00000005)
+#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_MISALIGNED_POINTER             (0x00000006)
+#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_DMA_NACK                       (0x00000007)
+#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_CHECK_FAILURE                       (0x00000008)
+#define NVCBA2_ERROR_MISALIGNED_SIZE                                            (0x00000009)
+#define NVCBA2_ERROR_MISSING_METHODS                                            (0x0000000A)
+#define NVCBA2_ERROR_SEMAPHORE_RELEASE_DMA_NACK                                 (0x0000000B)
+#define NVCBA2_ERROR_DECRYPT_SIZE_MAX_EXCEEDED                                  (0x0000000C)
+#define NVCBA2_ERROR_OS_APPLICATION                                             (0x0000000D)
+#define NVCBA2_ERROR_INVALID_CTXSW_REQUEST                                      (0x0000000E)
+#define NVCBA2_ERROR_BUFFER_OVERFLOW                                            (0x0000000F)
+#define NVCBA2_ERROR_IV_OVERFLOW                                                (0x00000010)
+#define NVCBA2_ERROR_INTERNAL_SETUP_FAILURE                                     (0x00000011)
+#define NVCBA2_ERROR_DECRYPT_COPY_INTERNAL_DMA_FAILURE                          (0x00000012)
+#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_INTERNAL_DMA_FAILURE           (0x00000013)
+#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_HMAC_CALC_FAILURE                   (0x00000014)
+#define NVCBA2_ERROR_NONCE_OVERFLOW                                             (0x00000015)
+#define NVCBA2_ERROR_AES_GCM_DECRYPTION_FAILURE                                 (0x00000016)
+#define NVCBA2_ERROR_SEMAPHORE_RELEASE_INTERNAL_DMA_FAILURE                     (0x00000017)
+#define NVCBA2_ERROR_KEY_DERIVATION_FAILURE                                     (0x00000018)
+#define NVCBA2_ERROR_SCRUBBER_FAILURE                                           (0x00000019)
+#define NVCBA2_ERROR_SCRUBBER_INVALD_ADDRESS                                    (0x0000001a)
+#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS                          (0x0000001b)
+#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE                             (0x0000001c)
+#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED                                    (0x0000001d)

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -35,6 +35,7 @@
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100                (0x00000180)
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100                (0x00000190)
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100                (0x000001A0)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200                (0x000001B0)

 /* valid ARCHITECTURE_GP10x implementation values */
 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100              (0x00000000)
--- a/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
@@ -1,560 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2003-2016 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef __gb100_dev_mmu_h__
-#define __gb100_dev_mmu_h__
-/* This file is autogenerated.  Do not edit */
-#define NV_MMU_PDE                                                      /* ----G */
-#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
-#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
-#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
-#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
-#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
-#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
-#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
-#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_PDE__SIZE                                              8
-#define NV_MMU_PTE                                                      /* ----G */
-#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
-#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
-#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
-#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
-#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
-#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
-#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
-#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
-#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
-#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
-#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
-#define NV_MMU_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
-#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
-#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
-#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
-#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
-#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
-#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
-#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_PTE__SIZE                                             8
-#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
-#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
-#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
-#define NV_MMU_PTE_KIND                              (1*32+7):(1*32+4) /* RWXVF */
-#define NV_MMU_PTE_KIND_INVALID                       0x07 /* R---V */
-#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY                                                  0x6 /* R---V */
-#define NV_MMU_PTE_KIND_Z16                                                             0x1 /* R---V */
-#define NV_MMU_PTE_KIND_S8                                                              0x2 /* R---V */
-#define NV_MMU_PTE_KIND_S8Z24                                                           0x3 /* R---V */
-#define NV_MMU_PTE_KIND_ZF32_X24S8                                                      0x4 /* R---V */
-#define NV_MMU_PTE_KIND_Z24S8                                                           0x5 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE                                     0x8 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC                         0x9 /* R---V */
-#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC                                     0xA /* R---V */
-#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC                                    0xB /* R---V */
-#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC                                  0xC /* R---V */
-#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC                             0xD /* R---V */
-#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC                                  0xE /* R---V */
-#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                                                  0xF /* R---V */
-#define NV_MMU_VER1_PDE                                                      /* ----G */
-#define NV_MMU_VER1_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
-#define NV_MMU_VER1_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
-#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_VER1_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER1_PDE__SIZE                                              8
-#define NV_MMU_VER1_PTE                                                      /* ----G */
-#define NV_MMU_VER1_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
-#define NV_MMU_VER1_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
-#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
-#define NV_MMU_VER1_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
-#define NV_MMU_VER1_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
-#define NV_MMU_VER1_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_VER1_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
-#define NV_MMU_VER1_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
-#define NV_MMU_VER1_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
-#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
-#define NV_MMU_VER1_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER1_PTE__SIZE                                             8
-#define NV_MMU_VER1_PTE_COMPTAGS_NONE                                    0x0 /*       */
-#define NV_MMU_VER1_PTE_COMPTAGS_1                                       0x1 /*       */
-#define NV_MMU_VER1_PTE_COMPTAGS_2                                       0x2 /*       */
-#define NV_MMU_NEW_PDE                                                      /* ----G */
-#define NV_MMU_NEW_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_NEW_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_NEW_PDE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_NEW_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_NEW_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_NO_ATS                                            5:5 /* RWXVF */
-#define NV_MMU_NEW_PDE_NO_ATS_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_NO_ATS_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_PDE__SIZE                                              8
-#define NV_MMU_NEW_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS                                       5:5 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE                                  0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE                                 0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_NEW_DUAL_PDE__SIZE                                             16
-#define NV_MMU_NEW_PTE                                                      /* ----G */
-#define NV_MMU_NEW_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_NEW_PTE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_NEW_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_ENCRYPTED                                        4:4 /* RWXVF */
-#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_NEW_PTE_PRIVILEGE                                        5:5 /* RWXVF */
-#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_READ_ONLY                                        6:6 /* RWXVF */
-#define NV_MMU_NEW_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
-#define NV_MMU_NEW_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
-#define NV_MMU_NEW_PTE_KIND                                           63:56 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_PTE__SIZE                                              8
-#define NV_MMU_VER2_PDE                                                      /* ----G */
-#define NV_MMU_VER2_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER2_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER2_PDE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_VER2_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER2_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_NO_ATS                                           5:5 /* RWXVF */
-#define NV_MMU_VER2_PDE_NO_ATS_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_NO_ATS_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_PDE__SIZE                                              8
-#define NV_MMU_VER2_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS                                      5:5 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE                                 0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE                                0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_VER2_DUAL_PDE__SIZE                                             16
-#define NV_MMU_VER2_PTE                                                      /* ----G */
-#define NV_MMU_VER2_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER2_PTE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_VER2_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_ENCRYPTED                                        4:4 /* RWXVF */
-#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_VER2_PTE_PRIVILEGE                                        5:5 /* RWXVF */
-#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_READ_ONLY                                        6:6 /* RWXVF */
-#define NV_MMU_VER2_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
-#define NV_MMU_VER2_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
-#define NV_MMU_VER2_PTE_KIND                                           63:56 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_PTE__SIZE                                              8
-#define NV_MMU_VER3_PDE                                                      /* ----G */
-#define NV_MMU_VER3_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER3_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER3_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER3_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER3_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF                                                                        5:3 /* RWXVF */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_ADDRESS                                             51:12 /* RWXVF */
-#define NV_MMU_VER3_PDE_ADDRESS_SHIFT                                  0x0000000c /*       */
-#define NV_MMU_VER3_PDE__SIZE                                              8
-#define NV_MMU_VER3_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG                                                                        5:3 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG                                     51:8 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL                                                                      69:67 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL                                 115:76 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_VER3_DUAL_PDE__SIZE                                             16
-#define NV_MMU_VER3_PTE                                                      /* ----G */
-#define NV_MMU_VER3_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF                                                                        7:3 /* RWXVF */
-#define NV_MMU_VER3_PTE_PCF_INVALID                                                         0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_SPARSE                                                          0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE                                                 0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE                                               0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE                                    0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE                                  0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE                                0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE                                    0x00000004 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE                                   0x00000005 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE                                  0x00000006 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE                                0x00000007 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE                                 0x00000008 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE                               0x00000009 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE                               0x0000000A /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE                             0x0000000B /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE                                 0x0000000C /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE                               0x0000000D /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE                               0x0000000E /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE                             0x0000000F /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD                                    0x00000010 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD                                  0x00000011 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD                                  0x00000012 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD                                0x00000013 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD                                    0x00000014 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD                                  0x00000015 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD                                  0x00000016 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD                                0x00000017 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD                                 0x00000018 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD                               0x00000019 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD                               0x0000001A /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD                             0x0000001B /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD                                 0x0000001C /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD                               0x0000001D /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD                               0x0000001E /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD                             0x0000001F /* RW--V */
-#define NV_MMU_VER3_PTE_KIND                                           11:8 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS                                         51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_SYS                                     51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_PEER                                    51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_VID                                     39:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_PEER_ID                63:(64-3) /* RWXVF */
-#define NV_MMU_VER3_PTE_PEER_ID_0                                 0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_1                                 0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_2                                 0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_3                                 0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_4                                 0x00000004 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_5                                 0x00000005 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_6                                 0x00000006 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_7                                 0x00000007 /* RW--V */
-#define NV_MMU_VER3_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER3_PTE__SIZE                                              8
-#define NV_MMU_CLIENT                                             /* ----G */
-#define NV_MMU_CLIENT_KIND                                    2:0 /* RWXVF */
-#define NV_MMU_CLIENT_KIND_Z16                                0x1 /* R---V */
-#define NV_MMU_CLIENT_KIND_S8                                 0x2 /* R---V */
-#define NV_MMU_CLIENT_KIND_S8Z24                              0x3 /* R---V */
-#define NV_MMU_CLIENT_KIND_ZF32_X24S8                         0x4 /* R---V */
-#define NV_MMU_CLIENT_KIND_Z24S8                              0x5 /* R---V */
-#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY                     0x6 /* R---V */
-#define NV_MMU_CLIENT_KIND_INVALID                            0x7 /* R---V */
-#endif // __gb100_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -1,14 +1,6 @@
 NVIDIA_UVM_SOURCES ?=
 NVIDIA_UVM_SOURCES_CXX ?=

-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
@@ -53,6 +45,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -66,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
@@ -81,8 +73,13 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
@@ -101,6 +98,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
@@ -128,3 +126,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -61,6 +61,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache

 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
@@ -75,6 +76,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
 NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
 NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
 NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -240,7 +240,7 @@ static void uvm_release_deferred(void *data)
    // Since this function is only scheduled to run when uvm_release() fails
    // to trylock-acquire the pm.lock, the following acquisition attempt
    // is expected to block this thread, and cause it to remain blocked until
-    // uvm_resume() releases the lock.  As a result, the deferred release
+    // uvm_resume() releases the lock. As a result, the deferred release
    // kthread queue may stall for long periods of time.
    uvm_down_read(&g_uvm_global.pm.lock);

@@ -292,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)

    // Because the kernel discards the status code returned from this release
    // callback, early exit in case of a pm.lock acquisition failure is not
-    // an option.  Instead, the teardown work normally performed synchronously
+    // an option. Instead, the teardown work normally performed synchronously
    // needs to be scheduled to run after uvm_resume() releases the lock.
    if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
        uvm_va_space_destroy(va_space);
        uvm_up_read(&g_uvm_global.pm.lock);
    }
    else {
-        // Remove references to this inode from the address_space.  This isn't
+        // Remove references to this inode from the address_space. This isn't
        // strictly necessary, as any CPU mappings of this file have already
        // been destroyed, and va_space->mapping won't be used again. Still,
        // the va_space survives the inode if its destruction is deferred, in
@@ -680,6 +680,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
    // Semaphore pool vmas do not have vma wrappers, but some functions will
    // assume vm_private_data is a wrapper.
    vma->vm_private_data = NULL;
+#if defined(VM_WIPEONFORK)
+    nv_vm_flags_set(vma, VM_WIPEONFORK);
+#endif

    if (is_fork) {
        // If we forked, leave the parent vma alone.
@@ -772,6 +775,9 @@ static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
    // Device P2P vmas do not have vma wrappers, but some functions will
    // assume vm_private_data is a wrapper.
    vma->vm_private_data = NULL;
+#if defined(VM_WIPEONFORK)
+    nv_vm_flags_set(vma, VM_WIPEONFORK);
+#endif

    if (is_fork) {
        // If we forked, leave the parent vma alone.
@@ -861,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    }

    // If the PM lock cannot be acquired, disable the VMA and report success
-    // to the caller.  The caller is expected to determine whether the
-    // map operation succeeded via an ioctl() call.  This is necessary to
+    // to the caller. The caller is expected to determine whether the
+    // map operation succeeded via an ioctl() call. This is necessary to
    // safely handle MAP_FIXED, which needs to complete atomically to prevent
    // the loss of the virtual address range.
    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@@ -1227,19 +1233,8 @@ static int uvm_init(void)
        goto error;
    }

-    pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
-
    if (uvm_enable_builtin_tests)
-        pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
-
-    // After Open RM is released, both the enclosing "#if" and this comment
-    // block should be removed, because the uvm_hmm_is_enabled_system_wide()
-    // check is both necessary and sufficient for reporting functionality.
-    // Until that time, however, we need to avoid advertisting UVM's ability to
-    // enable HMM functionality.
-
-    if (uvm_hmm_is_enabled_system_wide())
-        UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
+        UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");

    return 0;

@@ -1268,8 +1263,6 @@ static void uvm_exit(void)
    uvm_global_exit();

    uvm_test_unload_state_exit();
-
-    pr_info("Unloaded the UVM driver.\n");
 }

 static void __exit uvm_exit_entry(void)
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -379,6 +379,17 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
 //         OS state required to register the GPU is malformed, or the partition
 //         identified by the user handles or its configuration changed.
 //
+//     NV_ERR_NVLINK_FABRIC_NOT_READY:
+//         (On NvSwitch-connected system) Indicates that the fabric has not been
+//         configured yet. Caller must retry GPU registration.
+//
+//     NV_ERR_NVLINK_FABRIC_FAILURE:
+//         (On NvSwitch-connected systems) Indicates that the NvLink fabric
+//         failed to be configured.
+//
+//     NV_ERR_GPU_MEMORY_ONLINING_FAULURE:
+//         (On coherent systems) The GPU's memory onlining failed.
+//
 //     NV_ERR_GENERIC:
 //         Unexpected error. We try hard to avoid returning this error code,
 //         because it is not very informative.
@@ -1317,9 +1328,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
 //
 //     NV_ERR_INVALID_ARGUMENT:
 //         perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
-//         or caching is requested on more than one GPU.
-//         The Confidential Computing feature is enabled and the perGpuAttribs
-//         list is empty.
+//         or caching is requested on more than one GPU, or (in Confidential
+//         Computing only) the perGpuAttribs list is empty.
 //
 //     NV_ERR_NOT_SUPPORTED:
 //         The current process is not the one which called UvmInitialize, and
@@ -1469,7 +1479,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
 // If read duplication is enabled on any pages in the VA range, then those pages
 // are read duplicated at the destination processor, leaving the source copy, if
 // present, intact with only its mapping changed to read-only if it wasn't
-// already mapped that way.
+// already mapped that way. The exception to this behavior is migrating pages
+// between different NUMA nodes, in which case the pages are migrated to the
+// destination node and a read-only mapping is created to the migrated pages.
 //
 // Pages in the VA range are migrated even if their preferred location is set to
 // a processor other than the destination processor.
@@ -2212,7 +2224,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void                  *base,
 //
 // If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages
 // in this VA range, then those pages will also be read duplicated on the
-// destination processor for the migration.
+// destination processor for the migration unless the migration is between CPU
+// NUMA nodes, in which case the pages are migrated to the destination NUMA
+// node and a read-only mapping to the migrated pages is created.
 //
 // Enabling read duplication on a VA range requires the CPU and all GPUs with
 // registered VA spaces to be fault-capable. Otherwise, the migration and
@@ -3945,9 +3959,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
 // In-process scenario when targetVa address + size overlaps with buffer + size.
 //
 // This is essentially a UVM version of RM ctrl call
-// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
-// information), please refer to the documentation:
-// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
+// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
 //
 // Arguments:
 //     session: (INPUT)
@@ -4001,9 +4013,7 @@ NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle  session,
 // buffer + size.
 //
 // This is essentially a UVM version of RM ctrl call
-// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
-// information), please refer to the documentation:
-// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
+// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
 //
 // Arguments:
 //     session: (INPUT)
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2023 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
-                                                                                 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ada covers 128 TB and that's the minimum size
@@ -51,6 +49,9 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->rm_va_base = 0;
    parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;

+    parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
+    parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
+
    parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

@@ -79,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -98,4 +97,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->plc_supported = true;

    parent_gpu->no_ats_range_required = false;
+
+    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2023 NVIDIA Corporation
+    Copyright (c) 2018-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ampere covers 128 TB and that's the minimum
@@ -51,6 +49,9 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->rm_va_base = 0;
    parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;

+    parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
+    parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
+
    parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

@@ -83,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -107,4 +106,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->plc_supported = true;

    parent_gpu->no_ats_range_required = false;
+
+    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_ce.c
@@ -29,6 +29,8 @@

 bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
+    UVM_ASSERT(push->channel);
+
    if (!uvm_channel_is_proxy(push->channel))
        return true;

@@ -116,6 +118,16 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
 {
    NvU64 push_begin_gpu_va;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
+
+    UVM_ASSERT(push->channel);
+
+    if (peer_copy && !uvm_channel_is_p2p(push->channel)) {
+        UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
+                      src.address,
+                      dst.address);
+        return false;
+    }

    if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
        return true;
@@ -182,6 +194,8 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
 {
    uvm_pushbuffer_t *pushbuffer;

+    UVM_ASSERT(push->channel);
+
    if (!uvm_channel_is_proxy(push->channel))
        return;

--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -36,6 +36,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
    if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
        return true;

+    UVM_ASSERT(push->channel);
+
    if (uvm_channel_is_privileged(push->channel)) {
        switch (method_address) {
            case NVC56F_SET_OBJECT:
@@ -84,6 +86,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,

 bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
+    UVM_ASSERT(push->channel);
+
    if (!uvm_channel_is_proxy(push->channel))
        return true;

--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2023 NVIDIA Corporation
+    Copyright (c) 2024-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -29,6 +29,7 @@
 #include <linux/nodemask.h>
 #include <linux/mempolicy.h>
 #include <linux/mmu_notifier.h>
+#include <linux/topology.h>

 #if UVM_HMM_RANGE_FAULT_SUPPORTED()
 #include <linux/hmm.h>
@@ -55,8 +56,9 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
    NvU64 user_space_start;
    NvU64 user_space_length;
    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
-    bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
-    uvm_populate_permissions_t populate_permissions = fault_service_type ?
+    bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
+    bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
+    uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
                                            (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
                                            UVM_POPULATE_PERMISSIONS_INHERIT;

@@ -97,12 +99,20 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
        .start                              = start,
        .length                             = length,
        .populate_permissions               = populate_permissions,
-        .touch                              = fault_service_type,
-        .skip_mapped                        = fault_service_type,
-        .populate_on_cpu_alloc_failures     = fault_service_type,
-        .populate_on_migrate_vma_failures   = fault_service_type,
+        .touch                              = is_fault_service_type,
+        .skip_mapped                        = is_fault_service_type,
+        .populate_on_cpu_alloc_failures     = is_fault_service_type,
+        .populate_on_migrate_vma_failures   = is_fault_service_type,
        .user_space_start                   = &user_space_start,
        .user_space_length                  = &user_space_length,
+
+        // Potential STO NVLINK errors cannot be resolved in fault or access
+        // counter handlers. If there are GPUs to check for STO, it's either
+        // a) a false positive, and the migration went through ok, or
+        // b) a true positive, and the destination is all zeros, and the
+        //    application will be terminated soon.
+        .gpus_to_check_for_nvlink_errors    = NULL,
+        .fail_on_unresolved_sto_errors      = !is_fault_service_type || is_prefetch_faults,
    };

    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
@@ -113,7 +123,7 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
    // set skip_mapped to true. For pages already mapped, this will only handle
    // PTE upgrades if needed.
    status = uvm_migrate_pageable(&uvm_migrate_args);
-    if (fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
+    if (is_fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
        status = NV_OK;

    UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
@@ -129,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
    uvm_ats_fault_invalidate_t *ats_invalidate;

    if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
    else
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;

    if (!ats_invalidate->tlb_batch_pending) {
        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
@@ -146,7 +156,10 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
                                       uvm_ats_fault_context_t *ats_context)
 {
    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    int residency = uvm_gpu_numa_node(gpu);
+    int residency;
+
+    UVM_ASSERT(gpu->mem_info.numa.enabled);
+    residency = uvm_gpu_numa_node(gpu);

 #if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
    struct mempolicy *vma_policy = vma_policy(vma);
@@ -279,6 +292,27 @@ static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =

 #endif

+static bool resident_policy_match(struct vm_area_struct *vma, int dst_nid, int src_nid)
+{
+#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
+    struct mempolicy *vma_policy = vma_policy(vma);
+
+    // TODO: Bug 4981209: When migrations between CPU numa nodes are supported,
+    // add (dst_nid != closest_cpu_numa_node) to allow migrations between CPU
+    // NUMA nodes when destination is the closest_cpu_numa_node.
+    if (vma_policy &&
+        node_isset(src_nid, vma_policy->nodes) &&
+        node_isset(dst_nid, vma_policy->nodes) &&
+        !cpumask_empty(cpumask_of_node(src_nid)) &&
+        !cpumask_empty(cpumask_of_node(dst_nid))) {
+
+        return true;
+    }
+#endif
+
+    return false;
+}
+
 static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
                                            struct vm_area_struct *vma,
                                            NvU64 base,
@@ -358,9 +392,23 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,

            if (pfn & HMM_PFN_VALID) {
                struct page *page = hmm_pfn_to_page(pfn);
+                int resident_node = page_to_nid(page);

-                if (page_to_nid(page) == ats_context->residency_node)
+                // Set the residency_mask if:
+                // - The page is already resident at the intended destination.
+                //   or
+                // - If both the source and destination nodes are CPU nodes and
+                //   source node is already in the list of preferred nodes for
+                //   the vma. On multi-CPU NUMA node architectures, this avoids
+                //   unnecessary migrations between CPU nodes. Since the
+                //   specific ats_context->residency_node selected by
+                //   ats_batch_select_residency() is just a guess among the list
+                //   of preferred nodes, paying the cost of migration across the
+                //   CPU preferred nodes in this case can't be justified.
+                if ((resident_node == ats_context->residency_node) ||
+                    resident_policy_match(vma, ats_context->residency_node, resident_node)) {
                    uvm_page_mask_set(residency_mask, page_index);
+                }

                ats_context->prefetch_state.first_touch = false;
            }
@@ -463,6 +511,65 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

+static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
+                                               struct vm_area_struct *vma,
+                                               NvU64 base,
+                                               uvm_va_block_region_t region,
+                                               uvm_fault_access_type_t access_type,
+                                               uvm_ats_fault_context_t *ats_context,
+                                               uvm_page_mask_t *faults_serviced_mask)
+{
+    NvU64 start = base + (region.first * PAGE_SIZE);
+    size_t length = uvm_va_block_region_size(region);
+    NV_STATUS status;
+
+    UVM_ASSERT(start >= vma->vm_start);
+    UVM_ASSERT((start + length) <= vma->vm_end);
+
+    status = service_ats_requests(gpu_va_space,
+                                  vma,
+                                  start,
+                                  length,
+                                  access_type,
+                                  UVM_ATS_SERVICE_TYPE_FAULTS,
+                                  ats_context);
+    if (status != NV_OK)
+        return status;
+
+    uvm_page_mask_region_fill(faults_serviced_mask, region);
+
+    // WAR for older kernel versions missing an SMMU invalidate on RO -> RW
+    // transition. The SMMU and GPU could have the stale RO copy cached in their
+    // TLBs, which could have caused this write fault. This operation
+    // invalidates the SMMU TLBs but not the GPU TLBs. That will happen below as
+    // necessary.
+    if (access_type == UVM_FAULT_ACCESS_TYPE_WRITE)
+        uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
+
+    // The Linux kernel does not invalidate TLB entries on an invalid to valid
+    // PTE transition. The GPU might have the invalid PTE cached in its TLB.
+    // The GPU will re-fetch an entry on access if the PTE is invalid and the
+    // page size is not 4K, but if the page size is 4K no re-fetch will happen
+    // and the GPU will fault despite the CPU PTE being valid. We don't know
+    // whether these faults happened due to stale entries after a transition,
+    // so use the hammer of always invalidating the GPU's TLB on each fault.
+    //
+    // The second case is similar and handles missing ATS invalidations on RO ->
+    // RW transitions for all page sizes. See the uvm_ats_smmu_invalidate_tlbs()
+    // call above.
+    if (PAGE_SIZE == UVM_PAGE_SIZE_4K || (UVM_ATS_SMMU_WAR_REQUIRED() && access_type == UVM_FAULT_ACCESS_TYPE_WRITE)) {
+        flush_tlb_va_region(gpu_va_space, start, length, ats_context->client_type);
+    }
+    else {
+        // ARM requires TLB invalidations on RO -> RW, but not all architectures
+        // do. If we implement ATS support on other architectures, we might need
+        // to issue GPU invalidates.
+        UVM_ASSERT(NVCPU_IS_AARCH64);
+    }
+
+    return NV_OK;
+}
+
 NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
                                 struct vm_area_struct *vma,
                                 NvU64 base,
@@ -471,12 +578,11 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    NV_STATUS status = NV_OK;
    uvm_va_block_region_t subregion;
    uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
+    uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
    uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
    uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
    uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
-    uvm_fault_client_type_t client_type = ats_context->client_type;
-    uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;

    UVM_ASSERT(vma);
    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@@ -492,7 +598,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_zero(reads_serviced_mask);

    if (!(vma->vm_flags & VM_READ))
-        return status;
+        return NV_OK;

    if (!(vma->vm_flags & VM_WRITE)) {
        // If VMA doesn't have write permissions, all write faults are fatal.
@@ -508,72 +614,65 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,

        // There are no pending faults beyond write faults to RO region.
        if (uvm_page_mask_empty(read_fault_mask))
-            return status;
+            return NV_OK;
    }

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

-    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
+    ats_compute_prefetch(gpu_va_space, vma, base, UVM_ATS_SERVICE_TYPE_FAULTS, ats_context);

    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
-        NvU64 start = base + (subregion.first * PAGE_SIZE);
-        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
-        uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
-                                                                          UVM_FAULT_ACCESS_TYPE_WRITE :
-                                                                          UVM_FAULT_ACCESS_TYPE_READ;
-
-        UVM_ASSERT(start >= vma->vm_start);
-        UVM_ASSERT((start + length) <= vma->vm_end);
-
-        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
-        if (status != NV_OK)
-            return status;
+        uvm_fault_access_type_t access_type;
+        uvm_page_mask_t *serviced_mask;

        if (vma->vm_flags & VM_WRITE) {
-            uvm_page_mask_region_fill(faults_serviced_mask, subregion);
-            uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
-
-            // The Linux kernel never invalidates TLB entries on mapping
-            // permission upgrade. This is a problem if the GPU has cached
-            // entries with the old permission. The GPU will re-fetch the entry
-            // if the PTE is invalid and page size is not 4K (this is the case
-            // on P9). However, if a page gets upgraded from R/O to R/W and GPU
-            // has the PTEs cached with R/O permissions we will enter an
-            // infinite loop because we just forward the fault to the Linux
-            // kernel and it will see that the permissions in the page table are
-            // correct. Therefore, we flush TLB entries on ATS write faults.
-            flush_tlb_va_region(gpu_va_space, start, length, client_type);
+            access_type = UVM_FAULT_ACCESS_TYPE_WRITE;
+            serviced_mask = faults_serviced_mask;
        }
        else {
-            uvm_page_mask_region_fill(reads_serviced_mask, subregion);
+            // write_fault_mask contains just the addresses with both read and
+            // fatal write faults, so we need to service the read component.
+            access_type = UVM_FAULT_ACCESS_TYPE_READ;
+            serviced_mask = reads_serviced_mask;
        }
+
+        status = uvm_ats_service_faults_region(gpu_va_space,
+                                               vma,
+                                               base,
+                                               subregion,
+                                               access_type,
+                                               ats_context,
+                                               serviced_mask);
+        if (status != NV_OK)
+            return status;
    }

-    // Remove write faults from read_fault_mask
+    // Remove write faults from read_fault_mask to avoid double-service
    uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);

    for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
-        NvU64 start = base + (subregion.first * PAGE_SIZE);
-        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
-        uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
-
-        UVM_ASSERT(start >= vma->vm_start);
-        UVM_ASSERT((start + length) <= vma->vm_end);
-
-        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
+        status = uvm_ats_service_faults_region(gpu_va_space,
+                                               vma,
+                                               base,
+                                               subregion,
+                                               UVM_FAULT_ACCESS_TYPE_READ,
+                                               ats_context,
+                                               faults_serviced_mask);
        if (status != NV_OK)
            return status;
+    }

-        uvm_page_mask_region_fill(faults_serviced_mask, subregion);
-
-        // Similarly to permission upgrade scenario, discussed above, GPU
-        // will not re-fetch the entry if the PTE is invalid and page size
-        // is 4K. To avoid infinite faulting loop, invalidate TLB for every
-        // new translation written explicitly like in the case of permission
-        // upgrade.
-        if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
-            flush_tlb_va_region(gpu_va_space, start, length, client_type);
-
+    // Handle HW prefetch only faults
+    for_each_va_block_subregion_in_mask(subregion, prefetch_only_fault_mask, region) {
+        status = uvm_ats_service_faults_region(gpu_va_space,
+                                               vma,
+                                               base,
+                                               subregion,
+                                               UVM_FAULT_ACCESS_TYPE_PREFETCH,
+                                               ats_context,
+                                               faults_serviced_mask);
+        if (status != NV_OK)
+            return status;
    }

    return status;
@@ -679,7 +778,10 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT((start + length) <= vma->vm_end);

        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
-        if (status == NV_OK)
+
+        // clear access counters if pages were migrated or migration needs to
+        // be retried
+        if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
            uvm_page_mask_region_fill(migrated_mask, subregion);
        else if (status != NV_WARN_NOTHING_TO_DO)
            return status;
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@@ -29,12 +29,13 @@

 // Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
 // type for individual pages in the range requested by page masks set in
-// ats_context->fault.read_fault_mask/write_fault_mask. base must be aligned to
-// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
-// addresses fall completely within the VMA. The caller is also responsible for
-// ensuring that the faulting addresses don't overlap a GMMU region. (See
-// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
-// any errors returned by this function (fault cancellations etc.).
+// ats_context->fault.read_fault_mask/write_fault_mask/prefetch_only_mask.
+// base must be aligned to UVM_VA_BLOCK_SIZE. The caller is responsible for
+// ensuring that faulting addresses fall completely within the VMA. The caller
+// is also responsible for ensuring that the faulting addresses don't overlap
+// a GMMU region. (See uvm_ats_check_in_gmmu_region). The caller is also
+// responsible for handling any errors returned by this function (fault
+// cancellations etc.).
 //
 // Returns the fault service status in ats_context->fault.faults_serviced_mask.
 // In addition, ats_context->fault.reads_serviced_mask returns whether read
--- a/kernel-open/nvidia-uvm/uvm_blackwell.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2022-2023 NVIDIA Corporation
+    Copyright (c) 2022-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Blackwell covers 64 PB and that's the minimum
@@ -51,14 +49,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->rm_va_base = 0;
    parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;

+    parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
+    parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
+
    parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

    // See uvm_mmu.h for mapping placement
    parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);

-    // TODO: Bug 3953852: Set this to true pending Blackwell changes
-    parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
+    parent_gpu->ce_phys_vidmem_write_supported = true;

    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;

@@ -83,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -102,4 +100,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->plc_supported = true;

    parent_gpu->no_ats_range_required = true;
+
+    parent_gpu->conf_computing.per_channel_key_rotation = true;
 }
--- a/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
@@ -37,7 +37,6 @@
 #include "uvm_hal_types.h"
 #include "uvm_blackwell_fault_buffer.h"
 #include "hwref/blackwell/gb100/dev_fault.h"
-#include "hwref/blackwell/gb100/dev_mmu.h"

 static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;

--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,6 +38,7 @@
 #include "clb06f.h"
 #include "uvm_conf_computing.h"

+
 // WLC push is decrypted by SEC2 or CE (in WLC schedule).
 // In sysmem it's followed by auth tag.
 #define WLC_PUSHBUFFER_ALIGNMENT max3(UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT, \
@@ -97,6 +98,37 @@ typedef enum
    UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
 } uvm_channel_update_mode_t;

+typedef enum
+{
+    // Reserve an entry that is expected to use p2p operations.
+    UVM_CHANNEL_RESERVE_WITH_P2P,
+
+    // Reserve an entry that is not expected to use p2p operations.
+    UVM_CHANNEL_RESERVE_NO_P2P,
+} uvm_channel_reserve_type_t;
+
+bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
+{
+    uvm_channel_manager_t *manager = pool->manager;
+    uvm_gpu_t *gpu = manager->gpu;
+    uvm_gpu_id_t id;
+
+    if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
+        return true;
+
+    uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
+    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
+        if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+            return true;
+        }
+    }
+
+    uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
+    return false;
+}
+
 bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)
 {
    // Work submission to proxy channels in SR-IOV heavy entails calling RM API
@@ -119,10 +151,12 @@ bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)

 static void channel_pool_lock_init(uvm_channel_pool_t *pool)
 {
-    uvm_lock_order_t order = UVM_LOCK_ORDER_CHANNEL;
+    uvm_lock_order_t order;

    if (g_uvm_global.conf_computing_enabled && uvm_channel_pool_is_wlc(pool))
        order = UVM_LOCK_ORDER_WLC_CHANNEL;
+    else
+        order = UVM_LOCK_ORDER_CHANNEL;

    if (uvm_channel_pool_uses_mutex(pool))
        uvm_mutex_init(&pool->mutex, order);
@@ -274,7 +308,9 @@ NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel)
    return available;
 }

-static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
+static bool try_claim_channel_locked(uvm_channel_t *channel,
+                                     NvU32 num_gpfifo_entries,
+                                     uvm_channel_reserve_type_t reserve_type)
 {
    bool claimed = false;

@@ -283,6 +319,9 @@ static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_en

    uvm_channel_pool_assert_locked(channel->pool);

+    if (reserve_type == UVM_CHANNEL_RESERVE_WITH_P2P && channel->suspended_p2p)
+        return false;
+
    if (channel_get_available_gpfifo_entries(channel) >= num_gpfifo_entries) {
        channel->current_gpfifo_count += num_gpfifo_entries;
        claimed = true;
@@ -291,12 +330,14 @@ static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_en
    return claimed;
 }

-static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
+static bool try_claim_channel(uvm_channel_t *channel,
+                              NvU32 num_gpfifo_entries,
+                              uvm_channel_reserve_type_t reserve_type)
 {
    bool claimed;

    channel_pool_lock(channel->pool);
-    claimed = try_claim_channel_locked(channel, num_gpfifo_entries);
+    claimed = try_claim_channel_locked(channel, num_gpfifo_entries, reserve_type);
    channel_pool_unlock(channel->pool);

    return claimed;
@@ -349,7 +390,8 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
    if (uvm_channel_is_locked_for_push(channel))
        return false;

-    if (try_claim_channel_locked(channel, num_gpfifo_entries)) {
+    // Confidential compute is not using p2p ops, reserve without p2p
+    if (try_claim_channel_locked(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P)) {
        lock_channel_for_push(channel);
        return true;
    }
@@ -490,7 +532,9 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_

    for_each_clear_bit(index, pool->conf_computing.push_locks, pool->num_channels) {
        channel = &pool->channels[index];
-        if (try_claim_channel_locked(channel, 1)) {
+
+        // Confidential compute is not using p2p ops, reserve without p2p
+        if (try_claim_channel_locked(channel, 1, UVM_CHANNEL_RESERVE_NO_P2P)) {
            lock_channel_for_push(channel);
            goto done;
        }
@@ -529,7 +573,9 @@ done:
 }

 // Reserve a channel in the specified pool
-static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
+static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool,
+                                         uvm_channel_reserve_type_t reserve_type,
+                                         uvm_channel_t **channel_out)
 {
    uvm_channel_t *channel;
    uvm_spin_loop_t spin;
@@ -541,7 +587,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t

    uvm_for_each_channel_in_pool(channel, pool) {
        // TODO: Bug 1764953: Prefer idle/less busy channels
-        if (try_claim_channel(channel, 1)) {
+        if (try_claim_channel(channel, 1, reserve_type)) {
            *channel_out = channel;
            return NV_OK;
        }
@@ -554,7 +600,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t

            uvm_channel_update_progress(channel);

-            if (try_claim_channel(channel, 1)) {
+            if (try_claim_channel(channel, 1, reserve_type)) {
                *channel_out = channel;

                return NV_OK;
@@ -564,6 +610,9 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
            if (status != NV_OK)
                return status;

+            if (reserve_type == UVM_CHANNEL_RESERVE_WITH_P2P && channel->suspended_p2p)
+                return NV_ERR_BUSY_RETRY;
+
            UVM_SPIN_LOOP(&spin);
        }
    }
@@ -575,12 +624,18 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t

 NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
 {
+    uvm_channel_reserve_type_t reserve_type;
    uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];

    UVM_ASSERT(pool != NULL);
    UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);

-    return channel_reserve_in_pool(pool, channel_out);
+    if (type == UVM_CHANNEL_TYPE_GPU_TO_GPU)
+        reserve_type = UVM_CHANNEL_RESERVE_WITH_P2P;
+    else
+        reserve_type = UVM_CHANNEL_RESERVE_NO_P2P;
+
+    return channel_reserve_in_pool(pool, reserve_type, channel_out);
 }

 NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
@@ -596,7 +651,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,

    UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);

-    return channel_reserve_in_pool(pool, channel_out);
+    return channel_reserve_in_pool(pool, UVM_CHANNEL_RESERVE_WITH_P2P, channel_out);
 }

 NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
@@ -1441,7 +1496,6 @@ void uvm_channel_end_push(uvm_push_t *push)
    bool needs_sec2_work_submit = false;

    channel_pool_lock(channel->pool);
-
    encrypt_push(push);

    new_tracking_value = ++channel->tracking_sem.queued_value;
@@ -1523,6 +1577,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    // push must be updated before that. Notably uvm_pushbuffer_end_push() has
    // to be called first.
    unlock_channel_for_push(channel);
+
    channel_pool_unlock(channel->pool);

    // This memory barrier is borrowed from CUDA, as it supposedly fixes perf
@@ -1805,13 +1860,14 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
    if (g_uvm_global.conf_computing_enabled)
        return channel_reserve_and_lock(channel, num_gpfifo_entries);

-    if (try_claim_channel(channel, num_gpfifo_entries))
+    // Direct channel reservations don't use p2p
+    if (try_claim_channel(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P))
        return NV_OK;

    uvm_channel_update_progress(channel);

    uvm_spin_loop_init(&spin);
-    while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
+    while (!try_claim_channel(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P) && status == NV_OK) {
        UVM_SPIN_LOOP(&spin);
        status = uvm_channel_check_errors(channel);
        uvm_channel_update_progress(channel);
@@ -1825,9 +1881,11 @@ void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
    channel_pool_lock(channel->pool);

    UVM_ASSERT(uvm_channel_is_locked_for_push(channel));
+
    unlock_channel_for_push(channel);

    UVM_ASSERT(channel->current_gpfifo_count >= num_gpfifo_entries);
+
    channel->current_gpfifo_count -= num_gpfifo_entries;
    channel_pool_unlock(channel->pool);
 }
@@ -1852,6 +1910,140 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
    return entry;
 }

+static NV_STATUS channel_suspend_p2p(uvm_channel_t *channel)
+{
+    NV_STATUS status = NV_OK;
+
+    UVM_ASSERT(channel);
+    UVM_ASSERT(!channel->suspended_p2p);
+
+    // Reserve all entries to block traffic.
+    // Each channel needs 1 entry as sentinel.
+    status = uvm_channel_reserve(channel, channel->num_gpfifo_entries - 1);
+
+    // Prevent p2p traffic from reserving entries
+    if (status == NV_OK)
+        channel->suspended_p2p = true;
+
+    // Release the entries reserved above to allow non-p2p traffic
+    uvm_channel_release(channel, channel->num_gpfifo_entries - 1);
+
+    return status;
+}
+
+static void channel_resume_p2p(uvm_channel_t *channel)
+{
+    UVM_ASSERT(channel);
+    UVM_ASSERT(channel->suspended_p2p);
+
+    channel->suspended_p2p = false;
+}
+
+static NV_STATUS channel_pool_suspend_p2p(uvm_channel_pool_t *pool)
+{
+    NV_STATUS status = NV_OK;
+    NvU32 i;
+
+    UVM_ASSERT(pool);
+    UVM_ASSERT(!uvm_channel_pool_is_wlc(pool));
+    UVM_ASSERT(!uvm_channel_pool_is_lcic(pool));
+
+    for (i = 0; i < pool->num_channels; ++i) {
+        status = channel_suspend_p2p(pool->channels + i);
+        if (status != NV_OK)
+            break;
+    }
+
+    // Resume suspended channels in case of error
+    while ((status != NV_OK) && (i-- > 0))
+        channel_resume_p2p(pool->channels + i);
+
+    for (i = 0; i < pool->num_channels; ++i)
+        UVM_ASSERT(pool->channels[i].suspended_p2p == (status == NV_OK));
+
+    return status;
+}
+
+static void channel_pool_resume_p2p(uvm_channel_pool_t *pool)
+{
+    NvU32 i;
+
+    UVM_ASSERT(pool);
+    UVM_ASSERT(!uvm_channel_pool_is_wlc(pool));
+    UVM_ASSERT(!uvm_channel_pool_is_lcic(pool));
+
+    for (i = 0; i < pool->num_channels; ++i)
+        channel_resume_p2p(pool->channels + i);
+}
+
+NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager)
+{
+    uvm_channel_pool_t *pool;
+    NV_STATUS status = NV_OK;
+    uvm_gpu_t *gpu = channel_manager->gpu;
+    uvm_gpu_id_t gpu_id;
+    DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
+
+    // Pools can be assigned to multiple 'pool_to_use' locations
+    // Use bitmap to track which were suspended.
+    bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
+        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
+        if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
+            status = channel_pool_suspend_p2p(pool);
+            if (status != NV_OK)
+                break;
+
+            __set_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools);
+        }
+    }
+
+    pool = channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU];
+    if (status == NV_OK && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
+        status = channel_pool_suspend_p2p(pool);
+
+        // Do not set the suspended_pools bit here. If status is NV_OK it's not
+        // needed, otherwise it should not be set anyway.
+    }
+
+    // Resume suspended pools in case of error
+    if (status != NV_OK) {
+        unsigned i;
+
+        for_each_set_bit(i, suspended_pools, channel_manager->num_channel_pools)
+            channel_pool_resume_p2p(channel_manager->channel_pools + i);
+    }
+
+    return status;
+}
+
+void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
+{
+    uvm_channel_pool_t *pool;
+    uvm_gpu_t *gpu = channel_manager->gpu;
+    uvm_gpu_id_t gpu_id;
+    DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
+
+    // Pools can be assigned to multiple 'pool_to_use' locations
+    // Use bitmap to track which were suspended.
+    bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
+        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
+        if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
+            channel_pool_resume_p2p(pool);
+    }
+
+    pool = channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU];
+    if (!test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
+        channel_pool_resume_p2p(pool);
+}
+
 NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
 {
    uvm_gpu_t *gpu;
@@ -1891,7 +2083,7 @@ NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
    NV_STATUS status = uvm_channel_get_status(channel);

    if (status == NV_OK)
-        return NV_OK;
+        return status;

    UVM_ERR_PRINT("Detected a channel error, channel %s GPU %s\n",
                  channel->name,
@@ -2134,6 +2326,7 @@ static uvmGpuTsgHandle channel_get_tsg(uvm_channel_t *channel)

        tsg_index = uvm_channel_index_in_pool(channel);
    }
+
    UVM_ASSERT(tsg_index < pool->num_tsgs);

    return pool->tsg_handles[tsg_index];
@@ -2251,6 +2444,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
     if (status != NV_OK)
         goto error;

+    channel->suspended_p2p = false;
    channel->num_gpfifo_entries = channel_pool_num_gpfifo_entries(pool);
    channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
    if (channel->gpfifo_entries == NULL) {
@@ -2444,6 +2638,7 @@ static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_typ
 {
    if (pool_type ==  UVM_CHANNEL_POOL_TYPE_SEC2)
        return UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2;
+
    return UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
 }

@@ -2862,10 +3057,6 @@ static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
 {
    unsigned i;

-    // In Confidential Computing, do not mark all usable CEs, only the preferred
-    // ones, because non-preferred CE channels are guaranteed to not be used.
-    bool mark_all_usable_ces = !g_uvm_global.conf_computing_enabled;
-
    for (i = 0; i < num_channel_types; ++i) {
        unsigned ce;
        unsigned best_ce = UVM_COPY_ENGINE_COUNT_MAX;
@@ -2875,7 +3066,10 @@ static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
            if (!ce_is_usable(ce_caps + ce))
                continue;

-            if (mark_all_usable_ces)
+            // In Confidential Computing, do not mark all usable CEs, only the
+            // preferred ones, because non-preferred CE channels are guaranteed
+            // to not be used.
+            if (!g_uvm_global.conf_computing_enabled)
                __set_bit(ce, manager->ce_mask);

            if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
@@ -2919,6 +3113,7 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
                                    unsigned *preferred_ce)
 {
    unsigned best_wlc_ce;
+    uvm_gpu_t *gpu = manager->gpu;

    // The WLC type must go last so an unused CE is chosen, if available
    uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
@@ -2937,9 +3132,10 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,

    best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];

-    // TODO: Bug 4576908: in HCC, the WLC type should not share a CE with any
-    // channel type other than LCIC. The assertion should be a check instead.
-    UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
+    // The implementation of engine-wide key rotation depends on using a
+    // dedicated CE for the WLC and LCIC pools.
+    if (uvm_conf_computing_is_key_rotation_enabled(gpu) && !gpu->parent->conf_computing.per_channel_key_rotation)
+        UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
 }

 static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsigned *preferred_ce)
@@ -2967,6 +3163,7 @@ static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsign
        pick_ces_conf_computing(manager, ces_caps->copyEngineCaps, preferred_ce);
    else
        pick_ces(manager, ces_caps->copyEngineCaps, preferred_ce);
+
 out:
    uvm_kvfree(ces_caps);

@@ -3000,6 +3197,8 @@ void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *p

    UVM_ASSERT(manager->gpu != peer);
    UVM_ASSERT(optimal_ce < UVM_COPY_ENGINE_COUNT_MAX);
+    UVM_ASSERT(manager->gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);
+    UVM_ASSERT(peer->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);

    manager->pool_to_use.gpu_to_gpu[peer_gpu_index] = channel_manager_ce_pool(manager, optimal_ce);
 }
@@ -3056,14 +3255,14 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

    if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
-        pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
-                uvm_channel_num_gpfifo_entries,
-                manager->conf.num_gpfifo_entries);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
+                       uvm_channel_num_gpfifo_entries,
+                       manager->conf.num_gpfifo_entries);
    }

    // 2- Allocation locations

-    if (uvm_conf_computing_mode_is_hcc(gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        UVM_ASSERT(gpu->mem_info.size > 0);

        // When the Confidential Computing feature is enabled, the GPU is
@@ -3098,9 +3297,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
    if (!is_string_valid_location(pushbuffer_loc_value)) {
        pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
-                uvm_channel_pushbuffer_loc,
-                pushbuffer_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
+                       uvm_channel_pushbuffer_loc,
+                       pushbuffer_loc_value);
    }

    // Override the default value if requested by the user
@@ -3110,8 +3309,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        // so force the location to sys for now.
        // TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
        if (NVCPU_IS_AARCH64) {
-            pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
-                    pushbuffer_loc_value);
+            UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
+                           pushbuffer_loc_value);
            manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
        }
        else {
@@ -3123,8 +3322,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    // Only support the knobs for GPFIFO/GPPut on Volta+
    if (!gpu->parent->gpfifo_in_vidmem_supported) {
        if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
-            pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
-                    buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
+            UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
+                           "instead\n",
+                           buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
        }

        manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@@ -3136,17 +3336,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    gpfifo_loc_value = uvm_channel_gpfifo_loc;
    if (!is_string_valid_location(gpfifo_loc_value)) {
        gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
-                uvm_channel_gpfifo_loc,
-                gpfifo_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
+                       uvm_channel_gpfifo_loc,
+                       gpfifo_loc_value);
    }

    gpput_loc_value = uvm_channel_gpput_loc;
    if (!is_string_valid_location(gpput_loc_value)) {
        gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
-                uvm_channel_gpput_loc,
-                gpput_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
+                       uvm_channel_gpput_loc,
+                       gpput_loc_value);
    }

    // On coherent platforms where the GPU does not cache sysmem but the CPU
@@ -3290,7 +3490,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
    // WLC can only process one job at a time.
    // Prune any initialization entries and block all but one (+1 for sentinel)
    uvm_channel_update_progress(wlc);
-    if (!try_claim_channel(wlc, wlc->num_gpfifo_entries - 2)) {
+    if (!try_claim_channel(wlc, wlc->num_gpfifo_entries - 2, UVM_CHANNEL_RESERVE_NO_P2P)) {
        status = NV_ERR_INVALID_STATE;
        goto free_gpfifo_entries;
    }
@@ -3437,7 +3637,7 @@ static NV_STATUS setup_lcic_schedule(uvm_channel_t *paired_wlc, uvm_channel_t *l
    // Prune any initialization entries and
    // block all gpfifo entries (-1 for sentinel)
    uvm_channel_update_progress(lcic);
-    if (!try_claim_channel(lcic, lcic->num_gpfifo_entries - 1)) {
+    if (!try_claim_channel(lcic, lcic->num_gpfifo_entries - 1, UVM_CHANNEL_RESERVE_NO_P2P)) {
        status = NV_ERR_INVALID_STATE;
        goto free_gpfifo_entries;
    }
@@ -3700,6 +3900,7 @@ static void channel_manager_destroy_pools(uvm_channel_manager_t *manager)
 {
    uvm_rm_mem_free(manager->gpu->conf_computing.iv_rm_mem);
    manager->gpu->conf_computing.iv_rm_mem = NULL;
+
    while (manager->num_channel_pools > 0)
        channel_pool_destroy(manager->channel_pools + manager->num_channel_pools - 1);

@@ -3856,6 +4057,7 @@ const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)

 const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
 {
+
    BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 5);

    switch (channel_pool_type) {
@@ -3870,17 +4072,21 @@ const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool

 static const char *get_gpfifo_location_string(uvm_channel_t *channel)
 {
+
    // SEC2 channels override the channel manager location for GPFIFO.
    if (uvm_channel_is_sec2(channel))
        return buffer_location_to_string(UVM_BUFFER_LOCATION_SYS);
+
    return buffer_location_to_string(channel->pool->manager->conf.gpfifo_loc);
 }

 static const char *get_gpput_location_string(uvm_channel_t *channel)
 {
+
    // SEC2 channels override the channel manager location for GPPUT.
    if (uvm_channel_is_sec2(channel))
        return buffer_location_to_string(UVM_BUFFER_LOCATION_SYS);
+
    return buffer_location_to_string(channel->pool->manager->conf.gpput_loc);
 }

--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -200,6 +200,7 @@ typedef struct
    // num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
    // does not support TSG for CE engines, a HW TSG is not created, but a TSG
    // object is required to allocate channels.
+    //
    // When Confidential Computing mode is enabled, the WLC and LCIC channel
    // types require one TSG for each WLC/LCIC pair of channels. In this case,
    // we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
@@ -416,6 +417,8 @@ struct uvm_channel_struct
        struct list_head channel_list_node;
        NvU32 pending_event_count;
    } tools;
+
+    bool suspended_p2p;
 };

 struct uvm_channel_manager_struct
@@ -478,6 +481,12 @@ struct uvm_channel_manager_struct
    } conf_computing;
 };

+// Index of a channel pool within the manager
+static unsigned uvm_channel_pool_index_in_channel_manager(const uvm_channel_pool_t *pool)
+{
+    return pool - pool->manager->channel_pools;
+}
+
 // Create a channel manager for the GPU
 NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);

@@ -532,6 +541,8 @@ NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel

 char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);

+bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool);
+
 static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
 {
    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
@@ -549,6 +560,11 @@ static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
    return !uvm_channel_pool_is_sec2(pool);
 }

+static bool uvm_channel_is_p2p(uvm_channel_t *channel)
+{
+    return uvm_channel_pool_is_p2p(channel->pool);
+}
+
 static bool uvm_channel_is_ce(uvm_channel_t *channel)
 {
    return uvm_channel_pool_is_ce(channel->pool);
@@ -584,14 +600,25 @@ bool uvm_channel_is_privileged(uvm_channel_t *channel);
 // Destroy the channel manager
 void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);

+// Suspend p2p traffic on channels used for p2p operations.
+// This is used in STO recovery sequence to quiet nvlink traffic before the
+// links can be restored.
+NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager);
+
+// Resume p2p traffic on channels used for p2p operations.
+// This is used at the end of the STO recovery sequence to resume suspended p2p
+// traffic on p2p channels.
+void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager);
+
 // Get the current status of the channel
-// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
-// otherwise. Notably this never sets the global fatal error.
+// Returns NV_OK if the channel is in a good state,
+//         NV_ERR_RC_ERROR otherwise.
+// Notably this never sets the global fatal error.
 NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);

 // Check for channel errors
-// Checks for channel errors by calling uvm_channel_get_status(). If an error
-// occurred, sets the global fatal error and prints errors.
+// Checks for channel errors by calling uvm_channel_get_status().
+// If a fatal error occurred, sets the global fatal error and prints errors.
 NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);

 // Check errors on all channels in the channel manager
@@ -625,6 +652,7 @@ static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
 {
    return manager->conf_computing.wlc_ready;
 }
+
 // Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
 // associated with access_channel.
 //
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -206,9 +206,10 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
    uvm_for_each_pool(pool, manager) {
        uvm_channel_t *channel;

-            // Skip LCIC channels as those can't accept any pushes
-            if (uvm_channel_pool_is_lcic(pool))
-                continue;
+        // Skip LCIC channels as those can't accept any pushes
+        if (uvm_channel_pool_is_lcic(pool))
+            continue;
+
        uvm_for_each_channel_in_pool(channel, pool) {
            NvU32 i;
            for (i = 0; i < 512; ++i) {
@@ -1292,6 +1293,7 @@ static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
            // after their schedule is set up
            if (uvm_channel_pool_is_wlc(pool))
                continue;
+
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;

@@ -1331,6 +1333,7 @@ static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
            // after their schedule is set up
            if (uvm_channel_pool_is_wlc(pool))
                continue;
+
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;
                uvm_push_t push;
@@ -1473,6 +1476,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
            // Skip LCIC channels as those can't accept any pushes
            if (uvm_channel_pool_is_lcic(pool))
                continue;
+
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;
                uvm_push_t push;
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -57,6 +57,7 @@ enum {
 // NULL.
 void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);

+// Long prefix - typically for debugging and tests.
 #define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
    func(prefix "%s:%u %s[pid:%d]" fmt,               \
         kbasename(__FILE__),                         \
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
         current->pid,                                \
         ##__VA_ARGS__)

+// Short prefix - typically for information.
+#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
+    func(prefix fmt, ##__VA_ARGS__)
+
+// No prefix - used by kernel panic messages.
 #define UVM_PRINT_FUNC(func, fmt, ...)  \
    UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)

-// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
+// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
 bool uvm_debug_prints_enabled(void);

 // A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
        }                                                               \
    } while (0)

-#define UVM_ASSERT_PRINT(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ASSERT_PRINT_RL(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

 #define UVM_ERR_PRINT(fmt, ...) \
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
 #define UVM_DBG_PRINT(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_DBG_PRINT_RL(fmt, ...)                              \
+#define UVM_DBG_PRINT_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

+// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
+// used for relaying driver-level information, rather than detailed debugging
+// information; therefore, it does not add the "pretty long prefix".
 #define UVM_INFO_PRINT(fmt, ...) \
-    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+    UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...)                        \
+#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
    UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)

 #define UVM_PANIC()             UVM_PRINT_FUNC(panic, "\n")
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
 // Unlike on_uvm_test_fail it provides 'panic' coverity semantics
 void on_uvm_assert(void);

-#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                   \
-    do {                                                                                        \
-        if (unlikely(!(expr))) {                                                                \
-            UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
-            dump_stack();                                                                       \
-            on_uvm_assert();                                                                    \
-        }                                                                                       \
+#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                       \
+    do {                                                                                            \
+        if (unlikely(!(expr))) {                                                                    \
+            UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
+            dump_stack();                                                                           \
+            on_uvm_assert();                                                                        \
+        }                                                                                           \
    } while (0)

 // Prevent function calls in expr and the print argument list from being
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
        UVM_NO_PRINT(fmt, ##__VA_ARGS__);       \
    } while (0)

-// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
+// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
+// builds.
 #if UVM_IS_DEBUG() || defined __COVERITY__
    #define UVM_ASSERT_MSG(expr, fmt, ...)  _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
    #define UVM_ASSERT(expr)                _UVM_ASSERT_MSG(expr, #expr, "\n")
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
 // Given these are enabled for release builds, we need to be more cautious than
 // in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
 // param is enabled.
-#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                   \
-    do {                                                                                                \
-        if (uvm_release_asserts && unlikely(!(expr))) {                                                 \
-            UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
-            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
-                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                       \
-            if (uvm_release_asserts_dump_stack)                                                         \
-                dump_stack();                                                                           \
-            on_uvm_assert();                                                                            \
-        }                                                                                               \
+#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                       \
+    do {                                                                                                    \
+        if (uvm_release_asserts && unlikely(!(expr))) {                                                     \
+            UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
+            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests)     \
+                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                           \
+            if (uvm_release_asserts_dump_stack)                                                             \
+                dump_stack();                                                                               \
+            on_uvm_assert();                                                                                \
+        }                                                                                                   \
    } while (0)

 #define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2023 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -65,32 +65,13 @@ static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_I

 module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);

-static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
-{
-    return parent->rm_info.gpuConfComputeCaps.mode;
-}
-
-bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
-{
-    return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
-}
-
 void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
 {
-    uvm_parent_gpu_t *other_parent;
-    UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
-
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

-    // The Confidential Computing state of the GPU should match that of the
-    // system.
-    UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
-
-    // All GPUs derive Confidential Computing status from their parent. By
-    // current policy all parent GPUs have identical Confidential Computing
-    // status.
-    for_each_parent_gpu(other_parent)
-        UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
+    // Confidential Computing enablement on the system should match enablement
+    // on the GPU.
+    UVM_ASSERT(parent->rm_info.gpuConfComputeCaps.bConfComputingEnabled == g_uvm_global.conf_computing_enabled);
 }

 static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
@@ -343,9 +324,6 @@ static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
 {
    NV_STATUS status;

-    if (!uvm_conf_computing_mode_is_hcc(gpu))
-        return NV_OK;
-
    status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
    if (status != NV_OK)
        return status;
@@ -554,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // There is no dedicated lock for the CSL context associated with replayable
    // faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -593,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // See comment in uvm_conf_computing_fault_decrypt
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -730,7 +708,12 @@ void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)

 bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
 {
-    return gpu->channel_manager->conf_computing.key_rotation_enabled;
+    UVM_ASSERT(gpu);
+
+    // If the channel_manager is not set, we're in channel manager destroy
+    // path after the pointer was NULL-ed. Chances are that other key rotation
+    // infrastructure is not available either. Disallow the key rotation.
+    return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
 }

 bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2023 NVIDIA Corporation
+    Copyright (c) 2021-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -62,8 +62,6 @@

 void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);

-bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
-
 typedef struct
 {
    // List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -93,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;

 typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;

-typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
-typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
+typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
+typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
 typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
+typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
 typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
-typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;

 typedef struct uvm_reverse_map_struct uvm_reverse_map_t;

--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -35,6 +35,7 @@
 #include "uvm_mmu.h"
 #include "uvm_perf_heuristics.h"
 #include "uvm_pmm_sysmem.h"
+#include "uvm_pmm_gpu.h"
 #include "uvm_migrate.h"
 #include "uvm_gpu_access_counters.h"
 #include "uvm_va_space_mm.h"
@@ -54,6 +55,9 @@ static NV_STATUS uvm_register_callbacks(void)
    g_exported_uvm_ops.stopDevice  = NULL;
    g_exported_uvm_ops.isrTopHalf  = uvm_isr_top_half_entry;

+    g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
+    g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
+
    // Register the UVM callbacks with the main GPU driver:
    status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
    if (status != NV_OK)
@@ -87,6 +91,8 @@ NV_STATUS uvm_global_init(void)
    uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
    uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
    INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
+    uvm_mutex_init(&g_uvm_global.devmem_ranges.lock, UVM_LOCK_ORDER_LEAF);
+    INIT_LIST_HEAD(&g_uvm_global.devmem_ranges.list);

    status = uvm_kvmalloc_init();
    if (status != NV_OK) {
@@ -100,12 +106,6 @@ NV_STATUS uvm_global_init(void)
        goto error;
    }

-    status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
-    if (status != NV_OK) {
-        UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
-        goto error;
-    }
-
    status = uvm_procfs_init();
    if (status != NV_OK) {
        UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
@@ -194,15 +194,28 @@ NV_STATUS uvm_global_init(void)
        goto error;
    }

-    // This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
-    // call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
-    // this point:
+    status = uvm_access_counters_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    // This sets up the ISR (interrupt service routine), by hooking into RM's
+    // top-half ISR callback. As soon as this call completes, GPU interrupts
+    // will start arriving, so it's important to be prepared to receive
+    // interrupts before this point.
    status = uvm_register_callbacks();
    if (status != NV_OK) {
        UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
        goto error;
    }

+    status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
+    if (status != NV_OK) {
+        UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
    return NV_OK;

 error:
@@ -214,13 +227,11 @@ void uvm_global_exit(void)
 {
    uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);

-    // Guarantee completion of any release callbacks scheduled after the flush
-    // in uvm_resume().
-    nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
+    nv_kthread_q_stop(&g_uvm_global.deferred_release_q);

    uvm_unregister_callbacks();
+    uvm_access_counters_exit();
    uvm_service_block_context_exit();
-
    uvm_perf_heuristics_exit();
    uvm_perf_events_exit();
    uvm_migrate_exit();
@@ -229,6 +240,7 @@ void uvm_global_exit(void)
    uvm_va_policy_exit();
    uvm_mem_global_exit();
    uvm_pmm_sysmem_exit();
+    uvm_pmm_devmem_exit();
    uvm_gpu_exit();
    uvm_processor_mask_cache_exit();

@@ -237,7 +249,6 @@ void uvm_global_exit(void)

    uvm_procfs_exit();

-    nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
    nv_kthread_q_stop(&g_uvm_global.global_q);

    uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
@@ -282,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
    //   * Flush relevant kthread queues (bottom half, etc.)

    // Some locks acquired by this function, such as pm.lock, are released
-    // by uvm_resume().  This is contrary to the lock tracking code's
+    // by uvm_resume(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -299,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
        gpu = uvm_gpu_get(gpu_id);

        // Since fault buffer state may be lost across sleep cycles, UVM must
-        // ensure any outstanding replayable faults are dismissed.  The RM
+        // ensure any outstanding replayable faults are dismissed. The RM
        // guarantees that all user channels have been preempted before
        // uvm_suspend() is called, which implies that no user channels can be
        // stalled on faults when this point is reached.
@@ -325,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
    }

    // Acquire each VA space's lock in write mode to lock out VMA open and
-    // release callbacks.  These entry points do not have feasible early exit
+    // release callbacks. These entry points do not have feasible early exit
    // options, and so aren't suitable for synchronization with pm.lock.
    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);

@@ -355,7 +366,7 @@ static NV_STATUS uvm_resume(void)
    g_uvm_global.pm.is_suspended = false;

    // Some locks released by this function, such as pm.lock, were acquired
-    // by uvm_suspend().  This is contrary to the lock tracking code's
+    // by uvm_suspend(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -387,7 +398,7 @@ static NV_STATUS uvm_resume(void)
    uvm_thread_context_lock_enable_tracking();

    // Force completion of any release callbacks successfully queued for
-    // deferred completion while suspended.  The deferred release
+    // deferred completion while suspended. The deferred release
    // queue is not guaranteed to remain empty following this flush since
    // some threads that failed to acquire pm.lock in uvm_release() may
    // not have scheduled their handlers yet.
@@ -419,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
    }
    else {
        UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
-                nvstatusToString(error), nvstatusToString(previous_error));
+                nvstatusToString(error),
+                nvstatusToString(previous_error));
    }

    nvUvmInterfaceReportFatalError(error);
@@ -472,3 +484,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)

    return NV_OK;
 }
+
+static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
+{
+    NV_STATUS status = NV_OK;
+    uvm_parent_gpu_t *parent_gpu;
+
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+
+    // NVLINK STO recovery is not supported in combination with MIG
+    parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
+    if (!parent_gpu || parent_gpu->smc.enabled) {
+        status = NV_ERR_INVALID_DEVICE;
+        goto unlock;
+    }
+
+    status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
+
+unlock:
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+    return status;
+}
+
+static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
+{
+    NV_STATUS status = NV_OK;
+    uvm_parent_gpu_t *parent_gpu;
+
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+
+    // NVLINK STO recovery is not supported in combination with MIG
+    parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
+    if (!parent_gpu || parent_gpu->smc.enabled) {
+        status = NV_ERR_INVALID_DEVICE;
+        goto unlock;
+    }
+
+    uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
+
+unlock:
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+    return status;
+}
+
+NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
+{
+    UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
+}
+
+NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
+{
+    UVM_ENTRY_RET(resumeP2P(uuid));
+}
+
+NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
+{
+    uvm_gpu_t *gpu;
+
+    for_each_gpu_in_mask(gpu, gpus) {
+        NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
+        if (status != NV_OK)
+            return status;
+    }
+
+    return NV_OK;
+}
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -157,6 +157,12 @@ struct uvm_global_struct
    // This field is set once during global initialization (uvm_global_init),
    // and can be read afterwards without acquiring any locks.
    bool conf_computing_enabled;
+
+    // List of all devmem ranges allocted on this GPU
+    struct {
+        uvm_mutex_t lock;
+        struct list_head list;
+    } devmem_ranges;
 };

 // Initialize global uvm state
@@ -171,6 +177,12 @@ NV_STATUS uvm_suspend_entry(void);
 // Recover after exit from a system sleep state
 NV_STATUS uvm_resume_entry(void);

+// Block all P2P traffic on the GPU's channels
+NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid);
+
+// Resume P2P traffic on the GPU's channels
+NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid);
+
 // Add parent GPU to the global table.
 //
 // LOCKING: requires that you hold the global lock and gpu_table_lock
@@ -300,7 +312,7 @@ static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *
    return gpu;
 }

-static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
+static uvm_gpu_t *uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
 {
    uvm_gpu_id_t gpu_id;

@@ -322,7 +334,45 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
 #define for_each_gpu_in_mask(gpu, mask)                         \
    for (gpu = uvm_processor_mask_find_first_gpu(mask);         \
         gpu != NULL;                                           \
-         gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
+         gpu = uvm_processor_mask_find_next_gpu(mask, gpu))
+
+static uvm_parent_gpu_t *uvm_parent_processor_mask_find_first_gpu(const uvm_parent_processor_mask_t *mask)
+{
+    uvm_parent_gpu_t *parent_gpu;
+    uvm_parent_gpu_id_t parent_id = uvm_parent_processor_mask_find_first_gpu_id(mask);
+
+    if (UVM_PARENT_ID_IS_INVALID(parent_id))
+        return NULL;
+
+    parent_gpu = uvm_parent_gpu_get(parent_id);
+
+    // See comment in uvm_processor_mask_find_first_gpu().
+    UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
+    return parent_gpu;
+}
+
+static uvm_parent_gpu_t *uvm_parent_processor_mask_find_next_gpu(const uvm_parent_processor_mask_t *mask,
+                                                                 uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_parent_gpu_id_t parent_id;
+
+    UVM_ASSERT(parent_gpu);
+    parent_id = uvm_parent_processor_mask_find_next_gpu_id(mask, uvm_parent_id_next(parent_gpu->id));
+    if (UVM_PARENT_ID_IS_INVALID(parent_id))
+        return NULL;
+
+    parent_gpu = uvm_parent_gpu_get(parent_id);
+
+    // See comment in uvm_processor_mask_find_first_gpu().
+    UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
+    return parent_gpu;
+}
+
+// Helper to iterate over all parent GPUs in the input mask
+#define for_each_parent_gpu_in_mask(parent_gpu, mask)                   \
+    for ((parent_gpu) = uvm_parent_processor_mask_find_first_gpu((mask)); \
+         (parent_gpu);                                                  \
+         (parent_gpu) = uvm_parent_processor_mask_find_next_gpu((mask), (parent_gpu)))

 // Helper to iterate over all GPUs retained by the UVM driver
 // (across all va spaces).
@@ -330,7 +380,7 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
    for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                         \
           gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);});    \
           gpu != NULL;                                                                \
-           gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
+           gpu = uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))

 // LOCKING: Must hold either the global_lock or the gpu_table_lock
 static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -407,6 +457,10 @@ void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
 // Notably this check cannot be performed where it's not safe to call into RM.
 NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);

+// Check for nvlink errors for all GPUs in a mask
+// Notably this check cannot be performed where it's not safe to call into RM.
+NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus);
+
 // Pre-allocate fault service contexts.
 NV_STATUS uvm_service_block_context_init(void);

--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -97,6 +97,11 @@ struct uvm_service_block_context_struct
    // been serviced
    uvm_processor_mask_t resident_processors;

+    // A mask of GPUs that need to be checked for NVLINK errors before the
+    // handler returns, but after the VA space lock has been unlocked
+    // to avoid RM/UVM VA space lock deadlocks.
+    uvm_processor_mask_t gpus_to_check_for_nvlink_errors;
+
    // VA block region that contains all the pages affected by the operation
    uvm_va_block_region_t region;

@@ -184,6 +189,9 @@ struct uvm_service_block_context_struct

    // Prefetch temporary state.
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
+
+    // Access counters notification buffer index.
+    NvU32 access_counters_buffer_index;
 };

 typedef struct
@@ -192,6 +200,10 @@ typedef struct
    {
        struct
        {
+            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
+            // region of a SAM VMA. Used for batching ATS faults in a vma.
+            uvm_page_mask_t prefetch_only_fault_mask;
+
            // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
            // of a SAM VMA. Used for batching ATS faults in a vma.
            uvm_page_mask_t read_fault_mask;
@@ -202,7 +214,7 @@ typedef struct

            // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
            // of a SAM VMA. This is a logical or of read_fault_mask and
-            // write_mask.
+            // write_mask and prefetch_only_fault_mask.
            uvm_page_mask_t accessed_mask;

            // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
@@ -269,7 +281,6 @@ typedef struct
        // Prefetch temporary state.
        uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
    } prefetch_state;
-
 } uvm_ats_fault_context_t;

 struct uvm_fault_service_batch_context_struct
@@ -342,7 +353,7 @@ typedef struct
    // entries from the GPU buffer
    NvU32 max_batch_size;

-    struct uvm_replayable_fault_buffer_info_struct
+    struct uvm_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -406,7 +417,7 @@ typedef struct
        uvm_ats_fault_invalidate_t ats_invalidate;
    } replayable;

-    struct uvm_non_replayable_fault_buffer_info_struct
+    struct uvm_non_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -460,7 +471,7 @@ typedef struct

    // Timestamp when prefetch faults where disabled last time
    NvU64 disable_prefetch_faults_timestamp;
-} uvm_fault_buffer_info_t;
+} uvm_fault_buffer_t;

 struct uvm_access_counter_service_batch_context_struct
 {
@@ -468,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct

    NvU32 num_cached_notifications;

-    struct
-    {
-        uvm_access_counter_buffer_entry_t   **notifications;
+    uvm_access_counter_buffer_entry_t **notifications;

-        NvU32                             num_notifications;
+    NvU32 num_notifications;

-        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same instance_ptr
-        bool is_single_instance_ptr;
-    } virt;
-
-    struct
-    {
-        uvm_access_counter_buffer_entry_t    **notifications;
-        uvm_reverse_map_t                      *translations;
-
-        NvU32                              num_notifications;
-
-        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same aperture
-        bool                              is_single_aperture;
-    } phys;
+    // Boolean used to avoid sorting the fault batch by instance_ptr if we
+    // determine at fetch time that all the access counter notifications in
+    // the batch report the same instance_ptr
+    bool is_single_instance_ptr;

    // Helper page mask to compute the accessed pages within a VA block
    uvm_page_mask_t accessed_pages;
@@ -506,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
    NvU32 batch_id;
 };

-typedef struct
+struct uvm_access_counter_buffer_struct
 {
-    // Values used to configure access counters in RM
-    struct
-    {
-        UVM_ACCESS_COUNTER_GRANULARITY  granularity;
-        UVM_ACCESS_COUNTER_USE_LIMIT    use_limit;
-    } rm;
+    uvm_parent_gpu_t *parent_gpu;

-    // The following values are precomputed by the access counter notification
-    // handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
-    // uvm_gpu_access_counters.c for more details.
-    NvU64 translation_size;
-
-    NvU64 translations_per_counter;
-
-    NvU64 sub_granularity_region_size;
-
-    NvU64 sub_granularity_regions_per_translation;
-} uvm_gpu_access_counter_type_config_t;
-
-typedef struct
-{
    UvmGpuAccessCntrInfo rm_info;

+    // Access counters may have multiple notification buffers.
+    NvU32 index;
+
    NvU32 max_notifications;

    NvU32 max_batch_size;
@@ -552,10 +531,22 @@ typedef struct
    // may override it to try different configuration values.
    struct
    {
-        uvm_gpu_access_counter_type_config_t mimc;
-        uvm_gpu_access_counter_type_config_t momc;
+        // Values used to configure access counters in RM
+        struct
+        {
+            UVM_ACCESS_COUNTER_GRANULARITY granularity;
+        } rm;

-        NvU32                                threshold;
+        // The following values are precomputed by the access counter
+        // notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
+        // in uvm_gpu_access_counters.c for more details.
+        NvU64 translation_size;
+
+        NvU64 sub_granularity_region_size;
+
+        NvU64 sub_granularity_regions_per_translation;
+
+        NvU32 threshold;
    } current_config;

    // Access counter statistics
@@ -567,7 +558,7 @@ typedef struct
    } stats;

    // Ignoring access counters means that notifications are left in the HW
-    // buffer without being serviced.  Requests to ignore access counters
+    // buffer without being serviced. Requests to ignore access counters
    // are counted since the suspend path inhibits access counter interrupts,
    // and the resume path needs to know whether to reenable them.
    NvU32 notifications_ignored_count;
@@ -575,13 +566,25 @@ typedef struct
    // Context structure used to service a GPU access counter batch
    uvm_access_counter_service_batch_context_t batch_service_context;

-    // VA space that reconfigured the access counters configuration, if any.
-    // Used in builtin tests only, to avoid reconfigurations from different
-    // processes
-    //
-    // Locking: both readers and writers must hold the access counters ISR lock
-    uvm_va_space_t *reconfiguration_owner;
-} uvm_access_counter_buffer_info_t;
+    struct
+    {
+        // VA space that reconfigured the access counters configuration, if any.
+        // Used in builtin tests only, to avoid reconfigurations from different
+        // processes.
+        //
+        // Locking: both readers and writers must hold the access counters ISR
+        // lock.
+        uvm_va_space_t *reconfiguration_owner;
+
+        // The service access counters loop breaks after processing the first
+        // batch. It will be retriggered if there are pending notifications, but
+        // it releases the ISR service lock to check certain races that would be
+        // difficult to hit otherwise.
+        bool one_iteration_per_batch;
+        NvU32 sleep_per_iteration_us;
+    } test;
+
+};

 typedef struct
 {
@@ -688,6 +691,11 @@ struct uvm_gpu_struct
        // ZeroFB testing mode, this will be 0.
        NvU64 size;

+        // Physical start of heap, for SMC enabled GPUs, this is useful to
+        // partition PMM, it is used by HMM to figure out the right translation
+        // between HMM ranges and PMM offsets.
+        NvU64 phys_start;
+
        // Max (inclusive) physical address of this GPU's memory that the driver
        // can allocate through PMM (PMA).
        NvU64 max_allocatable_address;
@@ -701,7 +709,7 @@ struct uvm_gpu_struct
            // True if the platform supports HW coherence and the GPU's memory
            // is exposed as a NUMA node to the kernel.
            bool enabled;
-            unsigned int node_id;
+            int node_id;
        } numa;

        // Physical address of the start of statically mapped fb memory in BAR1
@@ -732,15 +740,11 @@ struct uvm_gpu_struct

    struct
    {
-        // Mask of peer_gpus set
+        // Mask of peer_gpus set.
        uvm_processor_mask_t peer_gpu_mask;

-        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
-        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
-
-        // Leaf spinlock used to synchronize access to the peer_gpus table so
-        // that it can be safely accessed from the access counters bottom half
-        uvm_spinlock_t peer_gpus_lock;
+        // Leaf spinlock used to synchronize access to peer_gpu_mask.
+        uvm_spinlock_t peer_gpu_lock;
    } peer_info;

    // Maximum number of subcontexts supported
@@ -815,14 +819,6 @@ struct uvm_gpu_struct
        uvm_bit_locks_t bitlocks;
    } sysmem_mappings;

-    // Reverse lookup table used to query the user mapping associated with a
-    // sysmem (DMA) physical address.
-    //
-    // The system memory mapping information referred to by this field is
-    // different from that of sysmem_mappings, because it relates to user
-    // mappings (instead of kernel), and it is used in most configurations.
-    uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
-
    struct
    {
        uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@@ -871,6 +867,38 @@ struct uvm_gpu_struct
        NvBool *error_notifier;
    } ecc;

+    // NVLINK STO recovery handling
+    // In order to trap STO errors as soon as possible the driver has the hw
+    // interrupt register mapped directly. If an STO interrupt is ever noticed
+    // to be pending, then the UVM driver needs to:
+    //
+    //   1) ask RM to service interrupts, and then
+    //   2) inspect the NVLINK error notifier state.
+    //
+    // Notably, checking for channel errors is not enough, because STO errors
+    // can be pending, even after a channel has become idle.
+    //
+    // See more details in uvm_gpu_check_nvlink_error().
+    struct
+    {
+        // Does the GPU have NVLINK STO recovery enabled?
+        bool enabled;
+
+        // Artificially injected error for testing
+        atomic_t injected_error;
+
+        // Direct mapping of the 32-bit part of the hw interrupt tree that has
+        // the NVLINK error bits.
+        volatile NvU32 *hw_interrupt_tree_location;
+
+        // Mask to get the NVLINK error interrupt bits from the 32-bits above.
+        NvU32 mask;
+
+        // Set to true by RM when a fatal NVLINK error is encountered (requires
+        // asking RM to service pending interrupts to be current).
+        NvBool *error_notifier;
+    } nvlink_status;
+
    struct
    {
        NvU32 swizz_id;
@@ -912,6 +940,16 @@ struct uvm_gpu_struct
    uvm_mutex_t device_p2p_lock;
 };

+typedef struct
+{
+    bool access_counters_alloc_buffer;
+    bool access_counters_alloc_block_context;
+    bool isr_access_counters_alloc;
+    bool isr_access_counters_alloc_stats_cpu;
+    bool access_counters_batch_context_notifications;
+    bool access_counters_batch_context_notification_cache;
+} uvm_test_parent_gpu_inject_error_t;
+
 // In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
 // parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
 // (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -920,8 +958,8 @@ struct uvm_gpu_struct
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
-    // (internal to the UVM driver).  This includes any GPUs we know about, not
-    // just GPUs that are registered with a VA space.  Most GPUs end up being
+    // (internal to the UVM driver). This includes any GPUs we know about, not
+    // just GPUs that are registered with a VA space. Most GPUs end up being
    // registered, but there are brief periods when they are not registered,
    // such as during interrupt handling, and in add_gpu() or remove_gpu().
    nv_kref_t gpu_kref;
@@ -931,7 +969,7 @@ struct uvm_parent_gpu_struct

    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

-    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
+    // Bitmap of valid child entries in the gpus[] table. Used to retrieve a
    // usable child GPU in bottom-halves.
    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

@@ -975,6 +1013,13 @@ struct uvm_parent_gpu_struct
    // Do not read this field directly, use uvm_gpu_device_handle instead.
    uvmGpuDeviceHandle rm_device;

+    // Total amount of physical memory available on the parent GPU.
+    NvU64 max_allocatable_address;
+
+#if UVM_IS_CONFIG_HMM()
+    uvm_pmm_gpu_devmem_t *devmem;
+#endif
+
    // The physical address range addressable by the GPU
    //
    // The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
@@ -1001,6 +1046,8 @@ struct uvm_parent_gpu_struct
    // Whether CE supports physical addressing mode for writes to vidmem
    bool ce_phys_vidmem_write_supported;

+    // Addressing mode(s) supported for CE transfers between this GPU and its
+    // peers: none, physical only, physical and virtual, etc.
    uvm_gpu_peer_copy_mode_t peer_copy_mode;

    // Virtualization mode of the GPU.
@@ -1025,11 +1072,6 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

-    // If this is true, physical address based access counter notifications are
-    // potentially generated. If false, only virtual address based notifications
-    // are generated (assuming access_counters_supported is true too).
-    bool access_counters_can_use_physical_addresses;
-
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@@ -1090,6 +1132,15 @@ struct uvm_parent_gpu_struct
    // Indicates whether the GPU can map sysmem with pages larger than 4k
    bool can_map_sysmem_with_large_pages;

+    struct
+    {
+        // If true, the granularity of key rotation is a single channel. If
+        // false, the key replacement affects all channels on the engine. The
+        // supported granularity is dependent on the number of key slots
+        // available in HW.
+        bool per_channel_key_rotation;
+    } conf_computing;
+
    // VA base and size of the RM managed part of the internal UVM VA space.
    //
    // The internal UVM VA is shared with RM by RM controlling some of the top
@@ -1102,6 +1153,11 @@ struct uvm_parent_gpu_struct
    NvU64 rm_va_base;
    NvU64 rm_va_size;

+    // Base and size of the GPU VA space used for peer identity mappings,
+    // it is used only if peer_copy_mode is UVM_GPU_PEER_COPY_MODE_VIRTUAL.
+    NvU64 peer_va_base;
+    NvU64 peer_va_size;
+
    // Base and size of the GPU VA used for uvm_mem_t allocations mapped in the
    // internal address_space_tree.
    NvU64 uvm_mem_va_base;
@@ -1137,17 +1193,17 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is
-    // set to true.
-    uvm_fault_buffer_info_t fault_buffer_info;
+    // This is only valid if supports_replayable_faults is set to true.
+    uvm_fault_buffer_t fault_buffer;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if
-    // supports_access_counters is set to true.
-    uvm_access_counter_buffer_info_t access_counter_buffer_info;
+    // This is only valid if supports_access_counters is set to true. This array
+    // has rm_info.accessCntrBufferCount entries.
+    uvm_access_counter_buffer_t *access_counter_buffer;
+    uvm_mutex_t access_counters_enablement_lock;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;
@@ -1232,6 +1288,10 @@ struct uvm_parent_gpu_struct
        // 47-bit fabric memory physical offset that peer gpus need to access
        // to read a peer's memory
        NvU64 fabric_memory_window_start;
+
+        // 47-bit fabric memory physical offset that peer gpus need to access
+        // to read remote EGM memory.
+        NvU64 egm_fabric_memory_window_start;
    } nvswitch_info;

    struct
@@ -1260,6 +1320,24 @@ struct uvm_parent_gpu_struct
        unsigned long smmu_prod;
        unsigned long smmu_cons;
    } smmu_war;
+
+    struct
+    {
+        // Is EGM support enabled on this GPU.
+        bool enabled;
+
+        // Local EGM peer ID. This ID is used to route EGM memory accesses to
+        // the local CPU socket.
+        NvU8 local_peer_id;
+
+        // EGM base address of the EGM carveout for remote EGM accesses.
+        // The base address is used when computing PTE PA address values for
+        // accesses to the local CPU socket's EGM memory from other peer
+        // GPUs.
+        NvU64 base_address;
+    } egm;
+
+    uvm_test_parent_gpu_inject_error_t test;
 };

 static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@@ -1307,10 +1385,10 @@ typedef struct
    //   detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
    //   called.
    //
-    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
-    //   GPU must be read from the original GPU's peer_gpus table. The fields
-    //   will not change while the lock is held, but they may no longer be valid
-    //   because the other GPU might be in teardown.
+    // - The peer_gpu_lock is held on one of the GPUs. In this case, the other
+    //   GPU must be referred from the original GPU's peer_gpu_mask reference.
+    //   The fields will not change while the lock is held, but they may no
+    //   longer be valid because the other GPU might be in teardown.

    // This field is used to determine when this struct has been initialized
    // (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1330,6 +1408,18 @@ typedef struct
    // peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
    NvU8 peer_ids[2];

+    // EGM peer Id associated with this device w.r.t. a peer GPU.
+    // Note: egmPeerId (A -> B) != egmPeerId (B -> A)
+    // egm_peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
+    // egm_peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
+    //
+    // Unlike VIDMEM peers, EGM peers are not symmetric. This means that if
+    // one of the GPUs is EGM-enabled, it does not automatically mean that
+    // the other is also EGM-enabled. Therefore, an EGM peer Ids are only
+    // valid if the peer GPU is EGM-enabled, i.e. egm_peer_id[0] is valid
+    // iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
+    NvU8 egm_peer_ids[2];
+
    // The link type between the peer parent GPUs, currently either PCIe or
    // NVLINK.
    uvm_gpu_link_type_t link_type;
@@ -1372,7 +1462,9 @@ void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);

 static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
 {
-    UVM_ASSERT(gpu->mem_info.numa.enabled);
+    if (!gpu->mem_info.numa.enabled)
+        UVM_ASSERT(gpu->mem_info.numa.node_id == NUMA_NO_NODE);
+
    return gpu->mem_info.numa.node_id;
 }

@@ -1381,6 +1473,7 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
    unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
    unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;

+    UVM_ASSERT(gpu->mem_info.numa.enabled);
    UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
    UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
    UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);
@@ -1407,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

 // Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
-// that the caller is holding the global_lock.  This is a narrower-purpose
+// that the caller is holding the global_lock. This is a narrower-purpose
 // function, and is only intended for use by the top-half ISR, or other very
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1418,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
 // LOCKING: Takes and releases the global lock for the caller.
 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out);

 // Retain a gpu which is known to already be retained. Does NOT require the
@@ -1459,13 +1553,28 @@ uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0,
 // They must not be the same gpu.
 uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

+// Returns the physical address for use by accessing_gpu of a vidmem allocation
+// on the peer owning_gpu. This address can be used for making PTEs on
+// accessing_gpu, but not for copying between the two GPUs. For that, use
+// uvm_gpu_peer_copy_address.
+uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
+
+// Returns the physical or virtual address for use by accessing_gpu to copy to/
+// from a vidmem allocation on the peer owning_gpu. This may be different from
+// uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
+// physical memory directly.
+uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
+
 // Return the reference count for the P2P state between the given GPUs.
 // The two GPUs must have different parents.
 NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);

-// Get the processor id accessible by the given GPU for the given physical
-// address.
-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
+// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
+// NUMA node that remote_gpu is attached to.
+// Note that local_gpu can be equal to remote_gpu when memory is resident in
+// CPU NUMA node local to local_gpu. In this case, the local EGM peer ID will
+// be used.
+uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent_gpu_t *remote_gpu);

 bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);

@@ -1508,8 +1617,8 @@ static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_par
    return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
 }

-// Given a GPU or CPU physical address (not peer), retrieve an address suitable
-// for CE access.
+// Given a GPU, CPU, or EGM PEER physical address (not VIDMEM peer), retrieve an
+// address suitable for CE access.
 static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
 {
    UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
@@ -1531,6 +1640,13 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
    return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
 }

+// Check whether the provided address points to peer memory:
+// * Physical address using one of the PEER apertures
+// * Physical address using SYS aperture that belongs to an exposed coherent
+//   memory
+// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
+bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
+
 // Check for ECC errors
 //
 // Notably this check cannot be performed where it's not safe to call into RM.
@@ -1543,6 +1659,23 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
 // and it's required to call uvm_gpu_check_ecc_error() to be sure.
 NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);

+// Check for NVLINK errors
+//
+// Inject NVLINK error
+NV_STATUS uvm_gpu_inject_nvlink_error(uvm_gpu_t *gpu, UVM_TEST_NVLINK_ERROR_TYPE error_type);
+
+NV_STATUS uvm_gpu_get_injected_nvlink_error(uvm_gpu_t *gpu);
+
+// Notably this check cannot be performed where it's not safe to call into RM.
+NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
+
+// Check for NVLINK errors without calling into RM
+//
+// Calling into RM is problematic in many places, this check is always safe to
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
+// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
+NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
+
 // Map size bytes of contiguous sysmem on the GPU for physical access
 //
 // size has to be aligned to PAGE_SIZE.
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,11 +27,11 @@
 #include "uvm_forward_decl.h"
 #include "uvm_test_ioctl.h"

-NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
-bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);

-void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);

 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
 //
 // When uningoring, the interrupt conditions will be re-evaluated to trigger
 // processing of buffered notifications, if any exist.
+//
+// All parent_gpu's notifications buffers are affected.
 void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);

 // Return whether the VA space has access counter migrations enabled. The
 // caller must ensure that the VA space cannot go away.
 bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);

-// Global perf initialization/cleanup functions
+// Global access counters initialization/cleanup functions.
+NV_STATUS uvm_access_counters_init(void);
+void uvm_access_counters_exit(void);
+
+// Global perf initialization/cleanup functions.
 NV_STATUS uvm_perf_access_counters_init(void);
 void uvm_perf_access_counters_exit(void);

-// VA space Initialization/cleanup functions. See comments in
+// VA space initialization/cleanup functions. See comments in
 // uvm_perf_heuristics.h
 NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
 void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
 // counters are currently enabled. The hardware notifications and interrupts on
 // the GPU are enabled the first time any VA space invokes
 // uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
-// uvm_parent_gpu_access_counters_disable().
+// uvm_gpu_access_counters_disable().
 //
 // Locking: the VA space lock must not be held by the caller since these
 // functions may take the access counters ISR lock.
 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
-void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
+void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);

 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
                                                      struct file *filp);
 NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);

 #endif // __UVM_GPU_ACCESS_COUNTERS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
    return 1;
 }

-static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
+static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    // On Volta, accessCntrBufferCount is > 0, but we don't support access
+    // counters in UVM (access_counters_supported is cleared during HAL
+    // initialization.) This check prevents the top-half from accessing
+    // unallocated memory.
+    if (!parent_gpu->access_counters_supported)
+        return 0;

    if (parent_gpu->isr.is_suspended)
        return 0;

-    if (!parent_gpu->isr.access_counters.handling_ref_count)
+    if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
        return 0;

-    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
+    if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
        return 0;

-    if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
-        up(&parent_gpu->isr.access_counters.service_lock.sem);
+    if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
+        up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
        return 0;
    }

    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled to avoid an interrupt storm
-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);

    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
-                                 &parent_gpu->isr.access_counters.bottom_half_q_item);
+                                 &parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);

    return 1;
 }

-// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
-// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
-// did, back to RM, via the return code:
+// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
+// UVM is given a chance to handle the interrupt, before most of the RM
+// processing. UVM communicates what it did, back to RM, via the return code:
 //
 //     NV_OK:
 //         UVM handled an interrupt.
 //
 //     NV_WARN_MORE_PROCESSING_REQUIRED:
-//         UVM did not schedule a bottom half, because it was unable to get the locks it
-//         needed, but there is still UVM work to be done. RM will return "not handled" to the
-//         Linux kernel, *unless* RM handled other faults in its top half. In that case, the
-//         fact that UVM did not handle its interrupt is lost. However, life and interrupt
-//         processing continues anyway: the GPU will soon raise another interrupt, because
-//         that's what it does when there are replayable page faults remaining (GET != PUT in
-//         the fault buffer).
+//         UVM did not schedule a bottom half, because it was unable to get the
+//         locks it needed, but there is still UVM work to be done. RM will
+//         return "not handled" to the Linux kernel, *unless* RM handled other
+//         faults in its top half. In that case, the fact that UVM did not
+//         handle its interrupt is lost. However, life and interrupt processing
+//         continues anyway: the GPU will soon raise another interrupt, because
+//         that's what it does when there are replayable page faults remaining
+//         (GET != PUT in the fault buffer).
 //
 //     NV_ERR_NO_INTR_PENDING:
-//         UVM did not find any work to do. Currently this is handled in RM in exactly the same
-//         way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
-//         available for the future. RM's interrupt handling tends to evolve as new chips and
-//         new interrupts get created.
+//         UVM did not find any work to do. Currently this is handled in RM in
+//         exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
+//         However, the extra precision is available for the future. RM's
+//         interrupt handling tends to evolve as new chips and new interrupts
+//         get created.

 static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
 {
    uvm_parent_gpu_t *parent_gpu;
    unsigned num_handlers_scheduled = 0;
    NV_STATUS status = NV_OK;
+    NvU32 i;

    if (!in_interrupt() && in_atomic()) {
        // Early-out if we're not in interrupt context, but memory allocations
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
    nv_kref_get(&parent_gpu->gpu_kref);
    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

-    // Now that we got a GPU object, lock it so that it can't be removed without us noticing.
+    // Now that we got a GPU object, lock it so that it can't be removed without
+    // us noticing.
    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

    ++parent_gpu->isr.interrupt_count;

    num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
    num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
-    num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
+    for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
+        num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);

    if (num_handlers_scheduled == 0) {
        if (parent_gpu->isr.is_suspended)
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
    return errno_to_nv_status(nv_kthread_q_init(queue, name));
 }

+static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_block_context_t *block_context;
+
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
+
+    status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
+                      nvstatusToString(status),
+                      uvm_parent_gpu_name(parent_gpu),
+                      notif_buf_index);
+        return status;
+    }
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
+        return NV_ERR_NO_MEMORY;
+
+    block_context = uvm_va_block_context_alloc(NULL);
+    if (!block_context)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
+        block_context;
+
+    nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
+                           access_counters_isr_bottom_half_entry,
+                           &parent_gpu->access_counter_buffer[notif_buf_index]);
+
+    // Access counters interrupts are initially disabled. They are
+    // dynamically enabled when the GPU is registered on a VA space.
+    parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
+        uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
+                          num_possible_cpus());
+    if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
+        return NV_ERR_NO_MEMORY;
+
+    return NV_OK;
+}
+
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        if (!block_context)
            return NV_ERR_NO_MEMORY;

-        parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
+        parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;

        parent_gpu->isr.replayable_faults.handling = true;

@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
            if (!block_context)
                return NV_ERR_NO_MEMORY;

-            parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
+            parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;

            parent_gpu->isr.non_replayable_faults.handling = true;

@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        }

        if (parent_gpu->access_counters_supported) {
-            status = uvm_parent_gpu_init_access_counters(parent_gpu);
-            if (status != NV_OK) {
-                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
-                              nvstatusToString(status),
-                              uvm_parent_gpu_name(parent_gpu));
-                return status;
+            NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
+            NvU32 notif_buf_index;
+
+            UVM_ASSERT(index_count > 0);
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
+                                                                  index_count);
+            if (!parent_gpu->access_counter_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
+            if (!parent_gpu->isr.access_counters)
+                return NV_ERR_NO_MEMORY;
+
+            for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
+                status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
+                if (status != NV_OK)
+                    return status;
            }
-
-            block_context = uvm_va_block_context_alloc(NULL);
-            if (!block_context)
-                return NV_ERR_NO_MEMORY;
-
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
-                block_context;
-
-            nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
-                                   access_counters_isr_bottom_half_entry,
-                                   parent_gpu);
-
-            // Access counters interrupts are initially disabled. They are
-            // dynamically enabled when the GPU is registered on a VA space.
-            parent_gpu->isr.access_counters.handling_ref_count = 0;
-            parent_gpu->isr.access_counters.stats.cpu_exec_count =
-                uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
-            if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
-                return NV_ERR_NO_MEMORY;
        }
    }

@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)

 void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+    NvU32 notif_buf_index;
+
+    if (parent_gpu->isr.access_counters) {
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
+                           "notif buf index: %u\n",
+                           notif_buf_index);
+        }
+    }

    // Now that the GPU is safely out of the global table, lock the GPU and mark
    // it as no longer handling interrupts so the top half knows not to schedule
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
    }

    if (parent_gpu->access_counters_supported) {
-        // It is safe to deinitialize access counters even if they have not been
-        // successfully initialized.
-        uvm_parent_gpu_deinit_access_counters(parent_gpu);
-        block_context =
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
-        uvm_va_block_context_free(block_context);
+        NvU32 notif_buf_index;
+
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            // It is safe to deinitialize access counters even if they have not
+            // been successfully initialized.
+            uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
+
+            if (parent_gpu->access_counter_buffer) {
+                uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
+                block_context = access_counter->batch_service_context.block_service_context.block_context;
+                uvm_va_block_context_free(block_context);
+            }
+
+            if (parent_gpu->isr.access_counters)
+                uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
+        }
+
+        uvm_kvfree(parent_gpu->isr.access_counters);
+        uvm_kvfree(parent_gpu->access_counter_buffer);
    }

    if (parent_gpu->non_replayable_faults_supported) {
-        block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
+        block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
        uvm_va_block_context_free(block_context);
+
+        uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
    }

-    block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
+    block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
    uvm_va_block_context_free(block_context);
+
    uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
 }

 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)

 static void access_counters_isr_bottom_half(void *args)
 {
-    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
    unsigned int cpu;

    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);

-    uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
+    uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);

    // Multiple bottom halves for counter notifications can be running
-    // concurrently, but only one can be running this function for a given GPU
-    // since we enter with the access_counters_isr_lock held.
+    // concurrently, but only one per-notification-buffer (i.e.,
+    // notif_buf_index) can be running this function for a given GPU since we
+    // enter with the per-notification-buffer access_counters_isr_lock held.
    cpu = get_cpu();
-    ++parent_gpu->isr.access_counters.stats.bottom_half_count;
-    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
-    ++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_access_counters(parent_gpu);
+    uvm_service_access_counters(access_counters);

-    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
+    uvm_access_counters_isr_unlock(access_counters);

    uvm_parent_gpu_kref_put(parent_gpu);
 }
@@ -612,40 +699,35 @@ static void access_counters_isr_bottom_half_entry(void *args)
   UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
 }

+// When Confidential Computing is enabled, UVM does not (indirectly) trigger
+// the replayable fault interrupt by updating GET. This is because, in this
+// configuration, GET is a dummy register used to inform GSP-RM (the owner
+// of the HW replayable fault buffer) of the latest entry consumed by the
+// UVM driver. The real GET register is owned by GSP-RM.
+//
+// The retriggering of a replayable faults bottom half happens then
+// manually, by scheduling a bottom half for later if there is any pending
+// work in the fault buffer accessible by UVM. The retriggering adddresses
+// two problematic scenarios caused by GET updates not setting any
+// interrupt:
+//
+//   (1) UVM didn't process all the entries up to cached PUT
+//
+//   (2) UVM did process all the entries up to cached PUT, but GSP-RM
+//       added new entries such that cached PUT is out-of-date
+//
+// In both cases, re-enablement of interrupts would have caused the
+// replayable fault to be triggered in a non-CC setup, because the updated
+// value of GET is different from PUT. But this not the case in Confidential
+// Computing, so a bottom half needs to be manually scheduled in order to
+// ensure that all faults are serviced.
+//
+// While in the typical case the retriggering happens within a replayable
+// fault bottom half, it can also happen within a non-interrupt path such as
+// uvm_gpu_fault_buffer_flush.
 static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
 {
-    bool retrigger = false;
-
-    // When Confidential Computing is enabled, UVM does not (indirectly) trigger
-    // the replayable fault interrupt by updating GET. This is because, in this
-    // configuration, GET is a dummy register used to inform GSP-RM (the owner
-    // of the HW replayable fault buffer) of the latest entry consumed by the
-    // UVM driver. The real GET register is owned by GSP-RM.
-    //
-    // The retriggering of a replayable faults bottom half happens then
-    // manually, by scheduling a bottom half for later if there is any pending
-    // work in the fault buffer accessible by UVM. The retriggering adddresses
-    // two problematic scenarios caused by GET updates not setting any
-    // interrupt:
-    //
-    //   (1) UVM didn't process all the entries up to cached PUT
-    //
-    //   (2) UVM did process all the entries up to cached PUT, but GSP-RM
-    //       added new entries such that cached PUT is out-of-date
-    //
-    // In both cases, re-enablement of interrupts would have caused the
-    // replayable fault to be triggered in a non-CC setup, because the updated
-    // value of GET is different from PUT. But this not the case in Confidential
-    // Computing, so a bottom half needs to be manually scheduled in order to
-    // ensure that all faults are serviced.
-    //
-    // While in the typical case the retriggering happens within a replayable
-    // fault bottom half, it can also happen within a non-interrupt path such as
-    // uvm_gpu_fault_buffer_flush.
-    if (g_uvm_global.conf_computing_enabled)
-        retrigger = true;
-
-    if (!retrigger)
+    if (!g_uvm_global.conf_computing_enabled)
        return;

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
@@ -730,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
        // clear_replayable_faults is a no-op for architectures that don't
        // support pulse-based interrupts.
        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
-                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+                                                              parent_gpu->fault_buffer.replayable.cached_get);
    }

    // This unlock call has to be out-of-order unlock due to interrupts_lock
@@ -756,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
    uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
 {
    // See comments in uvm_parent_gpu_replayable_faults_isr_lock
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(access_counters);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

-    uvm_down(&parent_gpu->isr.access_counters.service_lock);
+    uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+    uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
+
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

    // See comments in uvm_parent_gpu_replayable_faults_isr_unlock

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
+    uvm_access_counters_intr_enable(access_counters);

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
-        parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
-                                                                                  parent_gpu->access_counter_buffer_info.cached_get);
-    }
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
+        ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);

    // This unlock call has to be out-of-order unlock due to interrupts_lock
    // still being held. Otherwise, it would result in a lock order violation.
-    uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
+    uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }
@@ -811,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
        parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
 }

-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

    // The read of handling_ref_count could race with a write from
@@ -820,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
    // ISR lock. But those functions are invoked with the interrupt disabled
    // (disable_intr_ref_count > 0), so the check always returns false when the
    // race occurs
-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
    }

-    ++parent_gpu->isr.access_counters.disable_intr_ref_count;
+    ++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
 }

-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
-    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
-    UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
+    UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);

-    --parent_gpu->isr.access_counters.disable_intr_ref_count;
+    --parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
    }
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct

    struct
    {
-        // Number of the bottom-half invocations for this interrupt on a GPU over
-        // its lifetime
+        // Number of the bottom-half invocations for this interrupt on a GPU
+        // over its lifetime.
        NvU64 bottom_half_count;

        // A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
    // bottom-half per interrupt type.
    nv_kthread_q_t bottom_half_q;

-    // Protects the state of interrupts (enabled/disabled) and whether the GPU is
-    // currently handling them. Taken in both interrupt and process context.
+    // Protects the state of interrupts (enabled/disabled) and whether the GPU
+    // is currently handling them. Taken in both interrupt and process context.
    uvm_spinlock_irqsave_t interrupts_lock;

    uvm_intr_handler_t replayable_faults;
    uvm_intr_handler_t non_replayable_faults;
-    uvm_intr_handler_t access_counters;
+    uvm_intr_handler_t *access_counters;

    // Kernel thread used to kill channels on fatal non-replayable faults.
    // This is needed because we cannot call into RM from the bottom-half to
    // avoid deadlocks.
    nv_kthread_q_t kill_channel_q;

-    // Number of top-half ISRs called for this GPU over its lifetime
+    // Number of top-half ISRs called for this GPU over its lifetime.
    NvU64 interrupt_count;
 } uvm_isr_info_t;

@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
 // Initialize ISR handling state
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);

-// Flush any currently scheduled bottom halves.  This is called during GPU
+// Flush any currently scheduled bottom halves. This is called during GPU
 // removal.
 void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);

@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);

 // Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
-// half thread.  This will also disable replayable page fault interrupts (if
+// half thread. This will also disable replayable page fault interrupts (if
 // supported by the GPU) because the top half attempts to take this lock, and we
 // would cause an interrupt storm if we didn't disable them first.
 //
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);

 // Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
-// re-enable replayable page fault interrupts.  Unlike
-// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
+// re-enable replayable page fault interrupts. Unlike
+// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
 // non-top/bottom half threads, this can be called by any thread.
 void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Lock/unlock routines for non-replayable faults. These do not need to prevent
 // interrupt storms since the GPU fault buffers for non-replayable faults are
-// managed by RM.  Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
+// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
 // under the parent need to have been previously retained.
 void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // See uvm_parent_gpu_replayable_faults_isr_lock/unlock
-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);

 // Increments the reference count tracking whether access counter interrupts
 // should be disabled. The caller is guaranteed that access counter interrupts
 // are disabled upon return. Interrupts might already be disabled prior to
 // making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_parent_gpu_access_counters_intr_enable().
+// call to uvm_access_counters_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);

 // Decrements the reference count tracking whether access counter interrupts
 // should be disabled. Only once the count reaches 0 are the HW interrupts
 // actually enabled, so this call does not guarantee that the interrupts have
 // been re-enabled upon return.
 //
-// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
-// calling this function.
+// uvm_access_counters_intr_disable() must have been called prior to calling
+// this function.
 //
 // NOTE: For pulse-based interrupts, the caller is responsible for re-arming
 // the interrupt.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);

 // Return the first valid GPU given the parent GPU or NULL if no MIG instances
 // are registered. This should only be called from bottom halves or if the
 // g_uvm_global.global_lock is held so that the returned pointer remains valid.
-//
 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,18 +119,18 @@
 // calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
 NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

    non_replayable_faults->shadow_buffer_copy = NULL;
    non_replayable_faults->fault_cache        = NULL;

-    non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
+    non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
                                        parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    non_replayable_faults->shadow_buffer_copy =
-        uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
+        uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
    if (!non_replayable_faults->shadow_buffer_copy)
        return NV_ERR_NO_MEMORY;

@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_

 void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    if (non_replayable_faults->fault_cache) {
        UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)

    UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);

-    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                         &has_pending_faults);
    UVM_ASSERT(status == NV_OK);

@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    NV_STATUS status;
    NvU32 i;
    NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
    uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                  current_hw_entry,
                                                  cached_faults);

@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
    uvm_gpu_t *gpu = user_channel->gpu;
    NV_STATUS status;
    uvm_push_t push;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
    uvm_processor_id_t new_residency;
    bool read_duplicate;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    const uvm_va_policy_t *policy;

    UVM_ASSERT(!fault_entry->is_fatal);
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
    NV_STATUS status, tracker_status;
    uvm_va_block_retry_t va_block_retry;
    uvm_gpu_t *gpu = fault_entry->gpu;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;

    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
    service_context->num_retries = 0;
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
                                                                             service_context,
                                                                             hmm_migratable));

-    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
+    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
                                                  &va_block->tracker);

    uvm_mutex_unlock(&va_block->lock);
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
 {
    uvm_va_space_t *va_space = fault_entry->va_space;
    uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
                   (fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));

@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
    NV_STATUS status = lookup_status;
    NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@@ -579,6 +579,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
        uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
        uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;

+        uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
        uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
        uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
        uvm_page_mask_zero(&ats_context->faults.accessed_mask);
@@ -648,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
    struct mm_struct *mm;
    uvm_gpu_va_space_t *gpu_va_space;
    uvm_gpu_t *gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;

    status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@@ -756,7 +757,7 @@ exit_no_channel:
 static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
 {
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.non_replayable.block_service_context;
+        &parent_gpu->fault_buffer.non_replayable.block_service_context;
    NV_STATUS status;
    bool hmm_migratable = true;

@@ -793,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
        for (i = 0; i < cached_faults; ++i) {
-            status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
+            status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
            if (status != NV_OK)
                return;
        }
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
 // the power management resume path.
 static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    // Read the current get/put pointers, as this might not be the first time
    // we take control of the fault buffer since the GPU was initialized,
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);

    // (Re-)enable fault prefetching
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled)
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    else
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

-    UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
+    UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
               parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);

-    replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
+    replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
                                    parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    // Check provided module parameter value
-    parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
-                                                       (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
-    parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
-                                                       replayable_faults->max_faults);
+    parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
+                                                  (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
+    parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
+                                                  replayable_faults->max_faults);

-    if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
-        pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_batch_count,
-                UVM_PERF_FAULT_BATCH_COUNT_MIN,
-                replayable_faults->max_faults,
-                parent_gpu->fault_buffer_info.max_batch_size);
+    if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_batch_count,
+                       UVM_PERF_FAULT_BATCH_COUNT_MIN,
+                       replayable_faults->max_faults,
+                       parent_gpu->fault_buffer.max_batch_size);
    }

    batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
                                           UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;

    if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
-        pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_policy,
-                replayable_faults->replay_policy);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_policy,
+                       replayable_faults->replay_policy);
    }

    replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
    if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
-        pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_update_put_ratio,
-                replayable_faults->replay_update_put_ratio);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_update_put_ratio,
+                       replayable_faults->replay_update_put_ratio);
    }

    // Re-enable fault prefetching just in case it was disabled in a previous run
-    parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
+    parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;

    fault_buffer_reinit_replayable_faults(parent_gpu);

@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp

 static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    if (batch_context->fault_cache) {
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
        uvm_tracker_deinit(&replayable_faults->replay_tracker);
    }

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        // Re-enable prefetch faults in case we disabled them
-        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
            parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    }

@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
-                                                            &parent_gpu->fault_buffer_info.rm_info));
+                                                            &parent_gpu->fault_buffer.rm_info));
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
        // when it returns an error. Set the buffer handle to zero as it is
        // used by the deinitialization logic to determine if it was correctly
        // initialized.
-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
        goto fail;
    }

@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)

    fault_buffer_deinit_replayable_faults(parent_gpu);

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
        UVM_ASSERT(status == NV_OK);

        uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
-                                                               &parent_gpu->fault_buffer_info.rm_info));
+                                                               &parent_gpu->fault_buffer.rm_info));

-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
    }
 }

 bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);

-    // Fast path 1: we left some faults unserviced in the buffer in the last pass
+    // Fast path 1: we left some faults unserviced in the buffer in the last
+    // pass
    if (replayable_faults->cached_get != replayable_faults->cached_put)
        return true;

@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
+    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;

    UVM_ASSERT(tracker != NULL);

@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
    uvm_gpu_t *gpu = fault_entry->gpu;
    uvm_gpu_phys_address_t pdb;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    NvU64 offset;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_tracker_t *tracker = NULL;

    if (batch_context)
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,

 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
        return NV_OK;

    is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
-    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
+    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);

    UVM_ASSERT(status == NV_OK);

@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
    NvU32 get;
    NvU32 put;
    uvm_spin_loop_t spin;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    NV_STATUS status;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -636,6 +637,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
        status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
        if (status != NV_OK)
            return status;
+
        replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
    }

@@ -851,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
    uvm_fault_buffer_entry_t *fault_cache;
    uvm_spin_loop_t spin;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
    const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;

@@ -886,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,

    // Parse until get != put and have enough space to cache.
    while ((get != put) &&
-           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
+           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
        bool is_same_instance_ptr = true;
        uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
        uvm_fault_utlb_info_t *current_tlb;
@@ -898,7 +900,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
            // We have some entry to work on. Let's do the rest later.
            if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
                goto done;
-            
+
            status = uvm_global_get_status();
            if (status != NV_OK)
                goto done;
@@ -1384,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
    uvm_page_index_t last_page_index;
    NvU32 page_fault_count = 0;
    uvm_range_group_range_iter_t iter;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
    uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
    uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@@ -1611,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
    NV_STATUS status;
    uvm_va_block_retry_t va_block_retry;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;

    fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@@ -1715,7 +1717,7 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac

    status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);

-    // Remove prefetched pages from the serviced mask since fault servicing
+    // Remove SW prefetched pages from the serviced mask since fault servicing
    // failures belonging to prefetch pages need to be ignored.
    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);

@@ -1777,6 +1779,7 @@ static void start_new_sub_batch(NvU64 *sub_batch_base,
 {
    uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
    uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
+    uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);

    *sub_batch_fault_index = fault_index;
    *sub_batch_base = UVM_VA_BLOCK_ALIGN_DOWN(address);
@@ -1798,9 +1801,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
    uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
    uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
    uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
+    uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    bool replay_per_va_block =
-                        (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
+                        (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);

    UVM_ASSERT(vma);

@@ -1829,7 +1833,9 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

        // End of sub-batch. Service faults gathered so far.
        if (fault_address >= (sub_batch_base + UVM_VA_BLOCK_SIZE)) {
-            UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask));
+            UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) ||
+                       !uvm_page_mask_empty(write_fault_mask) ||
+                       !uvm_page_mask_empty(prefetch_only_fault_mask));

            status = service_fault_batch_ats_sub_vma(gpu_va_space,
                                                     vma,
@@ -1846,8 +1852,14 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

        page_index = (fault_address - sub_batch_base) / PAGE_SIZE;

-        if ((access_type <= UVM_FAULT_ACCESS_TYPE_READ) ||
-             uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
+        // Do not check for coalesced access type. If there are multiple
+        // different accesses to an address, we can disregard the prefetch one.
+        if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
+            (uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
+            uvm_page_mask_set(prefetch_only_fault_mask, page_index);
+
+        if ((access_type == UVM_FAULT_ACCESS_TYPE_READ) ||
+            uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
            uvm_page_mask_set(read_fault_mask, page_index);

        if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE)
@@ -1861,7 +1873,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
             (previous_entry->va_space == current_entry->va_space));

    // Service the last sub-batch.
-    if ((status == NV_OK) && (!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask))) {
+    if ((status == NV_OK) &&
+        (!uvm_page_mask_empty(read_fault_mask) ||
+         !uvm_page_mask_empty(write_fault_mask) ||
+         !uvm_page_mask_empty(prefetch_only_fault_mask))) {
        status = service_fault_batch_ats_sub_vma(gpu_va_space,
                                                 vma,
                                                 sub_batch_base,
@@ -1942,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
    uvm_va_block_t *va_block;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    uvm_va_block_context_t *va_block_context =
-        gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
+        gpu->parent->fault_buffer.replayable.block_service_context.block_context;
    uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
    struct mm_struct *mm = va_block_context->mm;
    NvU64 fault_address = current_entry->fault_address;
@@ -1971,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
        NvU64 outer = ~0ULL;

         UVM_ASSERT(replay_per_va_block ==
-                    (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
+                    (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));

        // Limit outer to the minimum of next va_range.start and first
        // fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@@ -2032,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
    uvm_gpu_t *gpu = batch_context->fatal_gpu;
    uvm_gpu_va_space_t *gpu_va_space = NULL;
    struct mm_struct *mm;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;

    UVM_ASSERT(va_space);
@@ -2141,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
            ++i;
        }
        else {
-            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
            NvU32 block_faults;
            const bool hmm_migratable = true;

@@ -2222,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
    NvU32 i;
    uvm_va_space_t *va_space = NULL;
    uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
-    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
+    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
    struct mm_struct *mm = NULL;
    const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
-                                     parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
+                                     parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.replayable.block_service_context;
+        &parent_gpu->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;
    bool hmm_migratable = true;

@@ -2697,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
 //   5- Fetch all faults from buffer
 //   6- Check what uTLBs are in lockdown mode and can be cancelled
 //   7- Preprocess faults (order per va_space, fault address, access type)
-//   8- Service all non-fatal faults and mark all non-serviceable faults as fatal
-//      6.1- If fatal faults are not found, we are done
+//   8- Service all non-fatal faults and mark all non-serviceable faults as
+//      fatal.
+//      8.1- If fatal faults are not found, we are done
 //   9- Search for a uTLB which can be targeted for cancel, as described in
 //      try_to_cancel_utlbs. If found, cancel it.
 // END LOOP
@@ -2712,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
 {
    NV_STATUS status;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    bool first = true;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);

    // 1) Disable prefetching to avoid new requests keep coming and flooding
    //    the buffer
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);

    while (1) {
@@ -2833,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
    }

    // 10) Re-enable prefetching
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);

    if (status == NV_OK)
@@ -2870,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
    // comment in mark_fault_invalid_prefetch(..).
    // Some tests rely on this logic (and ratio) to correctly disable prefetch
    // fault reporting. If the logic changes, the tests will have to be changed.
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
        uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
-        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
+        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
         (uvm_enable_builtin_tests &&
          parent_gpu->rm_info.isSimulated &&
          batch_context->num_invalid_prefetch_faults > 5))) {
        uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
    }
-    else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
-        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
+    else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
+        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;

        // Reenable prefetch faults after some time
        if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@@ -2893,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    NvU32 num_batches = 0;
    NvU32 num_throttled = 0;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);
@@ -3016,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
+        parent_gpu->fault_buffer.prefetch_faults_enabled = true;
    }
 }

@@ -3027,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
-        parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
+        parent_gpu->fault_buffer.prefetch_faults_enabled = false;
+        parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
    }
 }

--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -30,13 +30,34 @@

 #define UVM_SEMAPHORE_SIZE 4
 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
-#define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
+#define UVM_SEMAPHORE_COUNT_PER_PAGE (UVM_SEMAPHORE_PAGE_SIZE / UVM_SEMAPHORE_SIZE)

 // The top nibble of the canary base is intentionally 0. The rest of the value
 // is arbitrary. See the comments below on make_canary.
 #define UVM_SEMAPHORE_CANARY_BASE     0x0badc0de
 #define UVM_SEMAPHORE_CANARY_MASK     0xf0000000

+// In Confidential Computing, the representation of the semaphore payload
+// requires additional storage (fields), because it is encrypted.
+//
+// The payload fields are written by the GPU, and read by the CPU.
+typedef struct
+{
+    // The actual (encrypted) payload value.
+    NvU32 encrypted_payload;
+
+    // Plaintext number used to version a {encrypted_payload, auth_tag} pair.
+    // The notifier ensures that the CPU can decrypt a valid snapshot of those
+    // encryption materials.
+    uvm_gpu_semaphore_notifier_t notifier;
+
+    // Padding used to enforce 16-byte alignment of the authentication tag.
+    NvU64 unused;
+
+    // Authentication tag associated with the encrypted payload.
+    NvU8 auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
+} uvm_gpu_encrypted_semaphore_payload_t;
+
 struct uvm_gpu_semaphore_pool_struct
 {
    // The GPU owning the pool
@@ -61,14 +82,9 @@ struct uvm_gpu_semaphore_pool_page_struct
    uvm_rm_mem_t *memory;

    struct {
-        // Unprotected sysmem storing encrypted value of semaphores
+        // Unprotected sysmem storing encrypted value of semaphores, in addition
+        // to other encryption-related data.
        uvm_rm_mem_t *encrypted_payload_memory;
-
-        // Unprotected sysmem storing encryption auth tags
-        uvm_rm_mem_t *auth_tag_memory;
-
-        // Unprotected sysmem storing plain text notifier values
-        uvm_rm_mem_t *notifier_memory;
    } conf_computing;

    // Pool the page is part of
@@ -131,7 +147,6 @@ static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
    // A pool allocated in the CPR of vidmem cannot be read/written from the
    // CPU.
    return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
-    return UVM_IS_DEBUG();
 }

 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
@@ -146,68 +161,49 @@ static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
    uvm_rm_mem_free(page->memory);
    page->memory = NULL;

-    if (gpu_semaphore_pool_is_secure(page->pool)) {
-        uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
-        uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
-        uvm_rm_mem_free(page->conf_computing.notifier_memory);
-
-        page->conf_computing.encrypted_payload_memory = NULL;
-        page->conf_computing.auth_tag_memory = NULL;
-        page->conf_computing.notifier_memory = NULL;
-    }
-    else {
+    if (!gpu_semaphore_pool_is_secure(page->pool))
        UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
-        UVM_ASSERT(!page->conf_computing.auth_tag_memory);
-        UVM_ASSERT(!page->conf_computing.notifier_memory);
-    }
+
+    uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
+    page->conf_computing.encrypted_payload_memory = NULL;
 }

 static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
 {
    NV_STATUS status;
    uvm_gpu_semaphore_pool_t *pool = page->pool;
+    uvm_gpu_t *gpu = pool->gpu;
    uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
-    size_t align = 0;
-    bool map_all = true;
-    align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
-    map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
-
-    if (map_all)
-        status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
-    else
-        status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
-
-    if (status != NV_OK)
-        goto error;
+    size_t memory_size = UVM_SEMAPHORE_PAGE_SIZE;

    if (!gpu_semaphore_pool_is_secure(pool))
-        return NV_OK;
+        return uvm_rm_mem_alloc_and_map_all(gpu, memory_type, memory_size, 0, &page->memory);

-    status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
+    status = uvm_rm_mem_alloc(gpu, memory_type, memory_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT, &page->memory);
+    if (status != NV_OK)
+        goto error;
+
+    // TODO: Bug 4607874: This check can be removed once a more general solution
+    // to prevent reordering of CE writes is in place.
+    //
+    // The sysmem allocation backing the page's encrypted payload memory must be
+    // 32-bytes aligned (UVM_CONF_COMPUTING_BUF_ALIGNMENT). If each individual
+    // encrypted payload is 32 bytes, then it never spans more than a single,
+    // naturally aligned, segment of 32 bytes. This is required to prevent
+    // reordering issues that result on failures when decrypting the semaphore's
+    // payload on the CPU.
+    BUILD_BUG_ON(sizeof(uvm_gpu_encrypted_semaphore_payload_t) != UVM_CONF_COMPUTING_BUF_ALIGNMENT);
+
+    BUILD_BUG_ON(offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag) != UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
+
+    status = uvm_rm_mem_alloc_and_map_cpu(gpu,
                                          UVM_RM_MEM_TYPE_SYS,
-                                          UVM_SEMAPHORE_PAGE_SIZE,
+                                          UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(uvm_gpu_encrypted_semaphore_payload_t),
                                          UVM_CONF_COMPUTING_BUF_ALIGNMENT,
                                          &page->conf_computing.encrypted_payload_memory);
    if (status != NV_OK)
        goto error;

-    BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
-    status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
-                                          UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
-                                          UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
-                                          &page->conf_computing.auth_tag_memory);
-    if (status != NV_OK)
-        goto error;
-
-    status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
-                                          UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
-                                          0,
-                                          &page->conf_computing.notifier_memory);
-    if (status != NV_OK)
-        goto error;
-
    return NV_OK;
 error:
    pool_page_free_buffers(page);
@@ -492,51 +488,64 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
    return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
 }

+static uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore_payload(uvm_gpu_semaphore_t *semaphore)
+{
+    uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore;
+
+    encrypted_semaphore = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
+
+    return encrypted_semaphore + semaphore->index;
+}
+
+static NvU64 encrypted_semaphore_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
+{
+    uvm_gpu_semaphore_pool_page_t *page = semaphore->page;
+    NvU64 gpu_va_base = uvm_rm_mem_get_gpu_uvm_va(page->conf_computing.encrypted_payload_memory, page->pool->gpu);
+
+    return gpu_va_base + semaphore->index * sizeof(uvm_gpu_encrypted_semaphore_payload_t);
+}
+
 NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
-
-    return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
+    return &encrypted_semaphore_payload(semaphore)->encrypted_payload;
 }

 uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
-                                                        semaphore->page->pool->gpu);
+    size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, encrypted_payload);
+    NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;

-    return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
+    UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
+
+    return uvm_gpu_address_virtual_unprotected(gpu_va);
 }

 uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    uvm_gpu_semaphore_notifier_t *notifier_base_va =
-        uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
-
-    return notifier_base_va + semaphore->index;
+    return &encrypted_semaphore_payload(semaphore)->notifier;
 }

 uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
-                                                       semaphore->page->pool->gpu);
+    size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, notifier);
+    NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;

-    return uvm_gpu_address_virtual_unprotected(notifier_base_va +
-                                               semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
+    return uvm_gpu_address_virtual_unprotected(gpu_va);
 }

 void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
-
-    return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
+    return encrypted_semaphore_payload(semaphore)->auth_tag;
 }

 uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
-                                                       semaphore->page->pool->gpu);
+    size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag);
+    NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;

-    return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
+    UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
+
+    return uvm_gpu_address_virtual_unprotected(gpu_va);
 }

 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
@@ -595,7 +604,7 @@ static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking
 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
 {
    NV_STATUS status;
-    uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
+    uvm_lock_order_t order;

    memset(tracking_sem, 0, sizeof(*tracking_sem));

@@ -607,6 +616,8 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g

    if (g_uvm_global.conf_computing_enabled)
        order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
+    else
+        order = UVM_LOCK_ORDER_LEAF;

    if (tracking_semaphore_uses_mutex(tracking_sem))
        uvm_mutex_init(&tracking_sem->m_lock, order);
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
@@ -107,11 +107,12 @@ NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore
 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);

 // Allocate a semaphore from the pool.
+//
 // The semaphore will be mapped on all GPUs currently registered with the UVM
-// driver, and on all new GPUs which will be registered in the future.
-// Unless the Confidential Computing feature is enabled and the pool is a
-// secure pool. In this case, it is only mapped to the GPU that holds the
-// allocation.
+// driver, and on all new GPUs which will be registered in the future. The only
+// exception (in Confidential Computing) are semaphores allocated from a secure
+// pool, which are only mapped on the GPU that holds the allocation.
+//
 // The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
 // to the proxy address space.
 //
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -46,6 +46,8 @@
 #include "clc8b5.h"
 #include "clc96f.h"
 #include "clc9b5.h"
+#include "clca6f.h"
+#include "clcab5.h"

 static int uvm_downgrade_force_membar_sys = 1;
 module_param(uvm_downgrade_force_membar_sys, uint, 0644);
@@ -73,16 +75,17 @@ static uvm_hal_class_ops_t ce_table[] =
            .semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
            .semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
            .semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
+            .semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
            .offset_out = uvm_hal_maxwell_ce_offset_out,
            .offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
            .phys_mode = uvm_hal_maxwell_ce_phys_mode,
            .plc_mode = uvm_hal_maxwell_ce_plc_mode,
            .memcopy_copy_type = uvm_hal_maxwell_ce_memcopy_copy_type,
-            .memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
+            .memcopy_is_valid = uvm_hal_maxwell_ce_memcopy_is_valid,
            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
            .memcopy = uvm_hal_maxwell_ce_memcopy,
            .memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
-            .memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
+            .memset_is_valid = uvm_hal_maxwell_ce_memset_is_valid,
            .memset_1 = uvm_hal_maxwell_ce_memset_1,
            .memset_4 = uvm_hal_maxwell_ce_memset_4,
            .memset_8 = uvm_hal_maxwell_ce_memset_8,
@@ -142,9 +145,9 @@ static uvm_hal_class_ops_t ce_table[] =
        .u.ce_ops = {
            .method_is_valid = uvm_hal_method_is_valid_stub,
            .plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
-            .memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
+            .memcopy_is_valid = uvm_hal_maxwell_ce_memcopy_is_valid,
            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
-            .memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
+            .memset_is_valid = uvm_hal_maxwell_ce_memset_is_valid,
        },
    },
    {
@@ -171,6 +174,11 @@ static uvm_hal_class_ops_t ce_table[] =
        .parent_id = HOPPER_DMA_COPY_A,
        .u.ce_ops = {},
    },
+    {
+        .id = BLACKWELL_DMA_COPY_B,
+        .parent_id = BLACKWELL_DMA_COPY_A,
+        .u.ce_ops = {},
+    },
 };

 // Table for GPFIFO functions.  Same idea as the copy engine table.
@@ -185,6 +193,7 @@ static uvm_hal_class_ops_t host_table[] =
            .sw_method_is_valid = uvm_hal_method_is_valid_stub,
            .wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
            .membar_sys = uvm_hal_maxwell_host_membar_sys,
+
            // No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
            .membar_gpu = uvm_hal_maxwell_host_membar_sys,
            .noop = uvm_hal_maxwell_host_noop,
@@ -192,6 +201,7 @@ static uvm_hal_class_ops_t host_table[] =
            .semaphore_acquire = uvm_hal_maxwell_host_semaphore_acquire,
            .semaphore_release = uvm_hal_maxwell_host_semaphore_release,
            .semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
+            .semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
            .set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
            .set_gpfifo_noop = uvm_hal_maxwell_host_set_gpfifo_noop,
            .set_gpfifo_pushbuffer_segment_base = uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported,
@@ -207,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
            .clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
            .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
            .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
-            .access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
            .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
            .get_time = uvm_hal_maxwell_get_time,
        }
@@ -244,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
            .replay_faults = uvm_hal_volta_replay_faults,
            .cancel_faults_va = uvm_hal_volta_cancel_faults_va,
            .clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
-            .access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
-            .access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
-            .access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
            .semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
        }
    },
@@ -261,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
            .tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
            .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
+            .access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
+            .access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
        }
    },
    {
@@ -302,6 +310,11 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
        }
    },
+    {
+        .id = BLACKWELL_CHANNEL_GPFIFO_B,
+        .parent_id = BLACKWELL_CHANNEL_GPFIFO_A,
+        .u.host_ops = {}
+    },
 };

 static uvm_hal_class_ops_t arch_table[] =
@@ -383,6 +396,15 @@ static uvm_hal_class_ops_t arch_table[] =
            .mmu_client_id_to_utlb_id = uvm_hal_blackwell_mmu_client_id_to_utlb_id,
        }
    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
+        .u.arch_ops = {
+            // Note that GB20x MMU behaves as Hopper MMU, so it inherits from
+            // Hopper's MMU, not from GB10x.
+            .mmu_mode_hal = uvm_hal_mmu_mode_hopper,
+        }
+    },
 };

 static uvm_hal_class_ops_t fault_buffer_table[] =
@@ -479,6 +501,11 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .get_mmu_engine_type = uvm_hal_blackwell_fault_buffer_get_mmu_engine_type,
        }
    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
+        .u.fault_buffer_ops = {}
+    },
 };

 static uvm_hal_class_ops_t access_counter_buffer_table[] =
@@ -508,22 +535,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
-        .u.access_counter_buffer_ops = {
-            .enable_access_counter_notifications  = uvm_hal_volta_enable_access_counter_notifications,
-            .disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
-            .clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
-            .parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
-            .entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
-            .entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
-            .entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
-        }
+        .u.access_counter_buffer_ops = {}
    },
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .u.access_counter_buffer_ops = {
+            .enable_access_counter_notifications  = uvm_hal_turing_enable_access_counter_notifications,
            .disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
            .clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
+            .parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
+            .entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
+            .entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
+            .entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
        }
    },
    {
@@ -546,6 +570,11 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
        .u.access_counter_buffer_ops = {}
    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
+        .u.access_counter_buffer_ops = {}
+    },
 };

 static uvm_hal_class_ops_t sec2_table[] =
@@ -557,6 +586,7 @@ static uvm_hal_class_ops_t sec2_table[] =
            .decrypt = uvm_hal_maxwell_sec2_decrypt_unsupported,
            .semaphore_release = uvm_hal_maxwell_sec2_semaphore_release_unsupported,
            .semaphore_timestamp = uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported,
+            .semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
        }
    },
    {
@@ -604,6 +634,11 @@ static uvm_hal_class_ops_t sec2_table[] =
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
        .u.sec2_ops = {}
    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
+        .u.sec2_ops = {}
+    },
 };

 static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
@@ -799,19 +834,12 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

 static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
 {
-    // Access counters are currently not supported in vGPU.
+    // Access counters are currently not supported in vGPU or Confidential
+    // Computing.
    //
    // TODO: Bug 200692962: Add support for access counters in vGPU
-    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) {
+    if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
        parent_gpu->access_counters_supported = false;
-        parent_gpu->access_counters_can_use_physical_addresses = false;
-    }
-
-    // Access counters are not supported in Confidential Computing.
-    else if (g_uvm_global.conf_computing_enabled) {
-        parent_gpu->access_counters_supported = false;
-        parent_gpu->access_counters_can_use_physical_addresses = false;
-    }
 }

 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -1007,36 +1035,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
    UVM_DBG_PRINT("    timestamp:                    %llu\n", entry->timestamp);
 }

-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
-{
-    BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
-
-    switch (access_counter_type) {
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
-        UVM_ENUM_STRING_DEFAULT();
-    }
-}
-
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
 {
-    if (!entry->address.is_virtual) {
-        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
-                      entry->address.address,
-                      uvm_aperture_string(entry->address.aperture));
-    }
-    else {
-        UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
-        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
-                      entry->virtual_info.instance_ptr.address,
-                      uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
-        UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
-        UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->virtual_info.mmu_engine_id);
-        UVM_DBG_PRINT("    ve_id           %u\n", entry->virtual_info.ve_id);
-    }
-
-    UVM_DBG_PRINT("    is_virtual      %u\n", entry->address.is_virtual);
-    UVM_DBG_PRINT("    counter_type    %s\n", uvm_access_counter_type_string(entry->counter_type));
+    UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
+    UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
+                  entry->instance_ptr.address,
+                  uvm_aperture_string(entry->instance_ptr.aperture));
+    UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
+    UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->mmu_engine_id);
+    UVM_DBG_PRINT("    ve_id           %u\n", entry->ve_id);
    UVM_DBG_PRINT("    counter_value   %u\n", entry->counter_value);
    UVM_DBG_PRINT("    subgranularity  0x%08x\n", entry->sub_granularity);
    UVM_DBG_PRINT("    bank            %u\n", entry->bank);
@@ -1048,16 +1055,6 @@ bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32
    return true;
 }

-bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
-{
-    return true;
-}
-
 void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
 {
 }
-
-bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size)
-{
-    return true;
-}
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -268,6 +268,15 @@ void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU3
 void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

+// Semaphore op validation.
+// The validation happens at the start of semaphore op (uvm_hal_semaphore_*_t)
+// execution. This is currently shared for all semaphore operations;
+// semaphore releases by both CE, SEC2, and esched as well as semaphore
+// reduction operations, semaphore acquire, and semaphore release
+// operations with timestamp.
+typedef bool (*uvm_hal_semaphore_target_is_valid_t)(uvm_push_t *push, NvU64 gpu_va);
+bool uvm_hal_maxwell_semaphore_target_is_valid(uvm_push_t *push, NvU64 gpu_va);
+
 typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry,
                                                NvU64 pushbuffer_va,
                                                NvU32 pushbuffer_length,
@@ -330,10 +339,9 @@ bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_addre

 // Memcopy validation.
 // The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
-// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
-// a given architecture.
+// execution.
 typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
-bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);

@@ -358,13 +366,15 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s

 // Memset validation.
 // The validation happens at the start of the memset (uvm_hal_memset_*_t)
-// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
-// a given architecture.
+// execution.
 typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push,
                                           uvm_gpu_address_t dst,
                                           size_t num_elements,
                                           size_t element_size);
-bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size);
+bool uvm_hal_maxwell_ce_memset_is_valid(uvm_push_t *push,
+                                        uvm_gpu_address_t dst,
+                                        size_t num_elements,
+                                        size_t element_size);
 bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
                                            uvm_gpu_address_t dst,
                                            size_t num_elements,
@@ -484,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
+
 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
@@ -566,7 +577,6 @@ NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_ty
 uvm_mmu_engine_type_t uvm_hal_volta_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
                                                                     uvm_fault_client_type_t client_type,
                                                                     NvU16 client_id);
-
 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
@@ -676,54 +686,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);

 // Access counters
-typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);

 // Parse the entry on the given buffer index. This also clears the valid bit of
 // the entry in the buffer.
-typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
+typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
                                                            NvU32 index,
                                                            uvm_access_counter_buffer_entry_t *buffer_entry);
-typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                               NvU32 index);
+typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                                  NvU32 index);
 typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
-typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
 typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
                                                        const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get);
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index);
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index);
 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
-void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
                                                               const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                                     NvU32 index,
-                                                     uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
-
-void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
-void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
-void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
-                                                 const uvm_access_counter_buffer_entry_t *buffer_entry);
-
-void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
+void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
+                                                      NvU32 index,
+                                                      uvm_access_counter_buffer_entry_t *buffer_entry);
+bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
+void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
+                                                  const uvm_access_counter_buffer_entry_t *buffer_entry);

 // The source and destination addresses must be 16-byte aligned. Note that the
 // best performance is achieved with 256-byte alignment. The decrypt size must
@@ -760,6 +768,7 @@ struct uvm_host_hal_struct
    uvm_hal_semaphore_release_t semaphore_release;
    uvm_hal_semaphore_acquire_t semaphore_acquire;
    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+    uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
    uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
    uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
    uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
@@ -775,7 +784,6 @@ struct uvm_host_hal_struct
    uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
    uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
    uvm_hal_access_counter_clear_all_t access_counter_clear_all;
-    uvm_hal_access_counter_clear_type_t access_counter_clear_type;
    uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
    uvm_hal_get_time_t get_time;
 };
@@ -786,6 +794,7 @@ struct uvm_ce_hal_struct
    uvm_hal_ce_method_is_valid method_is_valid;
    uvm_hal_semaphore_release_t semaphore_release;
    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+    uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
    uvm_hal_ce_offset_out_t offset_out;
    uvm_hal_ce_offset_in_out_t offset_in_out;
    uvm_hal_ce_phys_mode_t phys_mode;
@@ -849,6 +858,7 @@ struct uvm_sec2_hal_struct
    uvm_hal_sec2_decrypt_t decrypt;
    uvm_hal_semaphore_release_t semaphore_release;
    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+    uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
 };

 typedef struct
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -301,7 +301,6 @@ typedef enum
    UVM_FAULT_TYPE_REGION_VIOLATION,
    UVM_FAULT_TYPE_POISONED,
    UVM_FAULT_TYPE_CC_VIOLATION,
-
    UVM_FAULT_TYPE_COUNT
 } uvm_fault_type_t;

@@ -472,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
    return max(membar_1, membar_2);
 }

-typedef enum
-{
-    UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
-    UVM_ACCESS_COUNTER_TYPE_MOMC,
-
-    UVM_ACCESS_COUNTER_TYPE_MAX,
-} uvm_access_counter_type_t;
-
-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
-
 struct uvm_access_counter_buffer_entry_struct
 {
-    // Whether this counter refers to outbound accesses to remote GPUs or
-    // sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
-    // GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
-    uvm_access_counter_type_t counter_type;
-
    // Address of the region for which a notification was sent
-    uvm_gpu_address_t address;
+    NvU64 address;

-    union
-    {
-        // These fields are only valid if address.is_virtual is true
-        struct
-        {
-            // Instance pointer of one of the channels in the TSG that triggered
-            // the notification.
-            uvm_gpu_phys_address_t instance_ptr;
+    // Instance pointer of one of the channels in the TSG that triggered
+    // the notification.
+    uvm_gpu_phys_address_t instance_ptr;

-            uvm_mmu_engine_type_t mmu_engine_type;
+    uvm_mmu_engine_type_t mmu_engine_type;

-            NvU32 mmu_engine_id;
+    NvU32 mmu_engine_id;

-            // Identifier of the subcontext that performed the memory accesses
-            // that triggered the notification. This value, combined with the
-            // instance_ptr, is needed to obtain the GPU VA space of the process
-            // that triggered the notification.
-            NvU32 ve_id;
+    // Identifier of the subcontext that performed the memory accesses
+    // that triggered the notification. This value, combined with the
+    // instance_ptr, is needed to obtain the GPU VA space of the process
+    // that triggered the notification.
+    NvU32 ve_id;

-            // VA space for the address that triggered the notification
-            uvm_va_space_t *va_space;
-        } virtual_info;
+    // VA space for the address that triggered the notification
+    uvm_va_space_t *va_space;

-        // These fields are only valid if address.is_virtual is false
-        struct
-        {
-            // Processor id where data is resident
-            //
-            // Although this information is not tied to a VA space, we can use
-            // a regular processor id because P2P is not allowed between
-            // partitioned GPUs.
-            uvm_processor_id_t resident_id;
-
-        } physical_info;
-    };
-
-    // This is the GPU that triggered the notification. Note that physical
-    // address based notifications are only supported on non-MIG-capable GPUs.
+    // This is the GPU that triggered the notification.
    uvm_gpu_t *gpu;

    // Number of times the tracked region was accessed since the last time it
    // was cleared. Counter values saturate at the maximum value supported by
-    // the GPU (2^16 - 1 in Volta)
+    // the GPU (2^16 - 1 on Turing)
    NvU32 counter_value;

    // When the granularity of the tracked regions is greater than 64KB, the
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -163,7 +163,7 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
 // Copies the contents of the source device-private page to the
 // destination CPU page. This will invalidate mappings, so cannot be
 // called while holding any va_block locks.
-static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
+static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
 {
    uvm_tracker_t tracker = UVM_TRACKER_INIT();
    uvm_gpu_phys_address_t src_addr;
@@ -184,7 +184,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
    gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
    status = uvm_mmu_chunk_map(gpu_chunk);
    if (status != NV_OK)
-        goto out_zero;
+        goto out;

    status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
    if (status != NV_OK)
@@ -207,7 +207,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
    uvm_push_end(&push);
    status = uvm_tracker_add_push_safe(&tracker, &push);
    if (status == NV_OK)
-        uvm_tracker_wait_deinit(&tracker);
+        status = uvm_tracker_wait_deinit(&tracker);

 out_unmap_cpu:
    uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
@@ -215,13 +215,8 @@ out_unmap_cpu:
 out_unmap_gpu:
    uvm_mmu_chunk_unmap(gpu_chunk, NULL);

-out_zero:
-    // We can't fail eviction because we need to free the device-private pages
-    // so the GPU can be unregistered. So the best we can do is warn on any
-    // failures and zero the uninitialised page. This could result in data loss
-    // in the application but failures are not expected.
-    if (WARN_ON(status != NV_OK))
-        memzero_page(dst_page, 0, PAGE_SIZE);
+out:
+    return status;
 }

 static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
@@ -245,7 +240,13 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
        }

        lock_page(dst_page);
-        hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
+
+        // We can't fail eviction because we need to free the device-private
+        // pages so the GPU can be unregistered. So the best we can do is warn
+        // on any failures and zero the uninitialized page. This could result
+        // in data loss in the application but failures are not expected.
+        if (hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn)) != NV_OK)
+            memzero_page(dst_page, 0, PAGE_SIZE);
        dst_pfn = migrate_pfn(page_to_pfn(dst_page));
        migrate_device_pages(&src_pfn, &dst_pfn, 1);
    }
@@ -320,13 +321,17 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
 {
    uvm_range_tree_node_t *node;
    uvm_va_block_t *va_block;
-    struct range range = gpu->pmm.devmem.pagemap.range;
+    unsigned long devmem_start;
+    unsigned long devmem_end;
    unsigned long pfn;
    bool retry;

    if (!uvm_hmm_is_enabled(va_space))
        return;

+    devmem_start = gpu->parent->devmem->pagemap.range.start + gpu->mem_info.phys_start;
+    devmem_end = devmem_start + gpu->mem_info.size;
+
    if (mm)
        uvm_assert_mmap_lock_locked(mm);
    uvm_assert_rwsem_locked_write(&va_space->lock);
@@ -340,7 +345,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
    do {
        retry = false;

-        for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
+        for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
            struct page *page = pfn_to_page(pfn);

            UVM_ASSERT(is_device_private_page(page));
@@ -348,7 +353,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
            // This check is racy because nothing stops the page being freed and
            // even reused. That doesn't matter though - worst case the
            // migration fails, we retry and find the va_space doesn't match.
-            if (page->zone_device_data == va_space)
+            if (uvm_pmm_devmem_page_to_va_space(page) == va_space)
                if (uvm_hmm_pmm_gpu_evict_pfn(pfn) != NV_OK)
                    retry = true;
        }
@@ -1597,7 +1602,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
        return status;
    }

-    status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
+    status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
    if (status != NV_OK) {
        uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
        uvm_cpu_chunk_free(chunk);
@@ -1712,7 +1717,7 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
    uvm_gpu_chunk_t *gpu_chunk;
    uvm_gpu_id_t id;

-    id = uvm_pmm_devmem_page_to_gpu_id(page);
+    id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
    gpu_state = uvm_va_block_gpu_state_get(va_block, id);
    UVM_ASSERT(gpu_state);

@@ -1724,6 +1729,11 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
        return;
    }

+    UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
+    UVM_ASSERT(gpu_chunk->is_referenced);
+
+    uvm_page_mask_clear(&gpu_state->resident, page_index);
+
    uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
    gpu_state->chunks[page_index] = NULL;
 }
@@ -1737,7 +1747,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
    uvm_gpu_id_t id;
    NV_STATUS status;

-    id = uvm_pmm_devmem_page_to_gpu_id(page);
+    id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
    gpu_state = uvm_va_block_gpu_state_get(va_block, id);

    // It's possible that this is a fresh va_block we're trying to add an
@@ -1759,7 +1769,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
    gpu_chunk = uvm_pmm_devmem_page_to_chunk(page);
    UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
    UVM_ASSERT(gpu_chunk->is_referenced);
-    UVM_ASSERT(page->zone_device_data == va_block->hmm.va_space);
+    UVM_ASSERT(uvm_pmm_devmem_page_to_va_space(page) == va_block->hmm.va_space);

    if (gpu_state->chunks[page_index] == gpu_chunk)
        return NV_OK;
@@ -1986,7 +1996,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
    hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
    UVM_ASSERT(!page_count(dpage));
    zone_device_page_init(dpage);
-    dpage->zone_device_data = va_block->hmm.va_space;
+    dpage->zone_device_data = gpu_chunk;

    dst_pfns[page_index] = migrate_pfn(pfn);
 }
@@ -2196,7 +2206,11 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex

    // Do the copy but don't update the residency or mapping for the new
    // location yet.
-    return uvm_va_block_service_copy(processor_id, UVM_ID_CPU, va_block, va_block_retry, service_context);
+    status = uvm_va_block_service_copy(processor_id, UVM_ID_CPU, va_block, va_block_retry, service_context);
+    if (status != NV_OK)
+        clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, service_context->region, page_mask);
+
+    return status;
 }

 static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
@@ -3482,12 +3496,17 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
        lock_page(dst_page);
        dst_pfn = migrate_pfn(page_to_pfn(dst_page));

-        hmm_copy_devmem_page(dst_page, src_page);
+        status = hmm_copy_devmem_page(dst_page, src_page);
+        if (status != NV_OK) {
+            unlock_page(dst_page);
+            __free_page(dst_page);
+            dst_pfn = 0;
+        }
    }

-    migrate_vma_pages(&args);
-
 out:
+    if (status == NV_OK)
+        migrate_vma_pages(&args);
    migrate_vma_finalize(&args);

    return status;
--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2023 NVIDIA Corporation
+    Copyright (c) 2020-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -36,12 +36,6 @@ static uvm_gpu_peer_copy_mode_t hopper_peer_copy_mode(uvm_parent_gpu_t *parent_g
    if (g_uvm_global.conf_computing_enabled)
        return UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;

-    // TODO: Bug 4174553: In some Grace Hopper setups, physical peer copies
-    // result on errors. Force peer copies to use virtual addressing until the
-    // issue is clarified.
-    if (uvm_parent_gpu_is_coherent(parent_gpu))
-        return UVM_GPU_PEER_COPY_MODE_VIRTUAL;
-
    return g_uvm_global.peer_copy_mode;
 }

@@ -56,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Hopper covers 64 PB and that's the minimum
@@ -69,6 +61,9 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->rm_va_base = 0;
    parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;

+    parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
+    parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
+
    parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

@@ -102,8 +97,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -121,4 +114,9 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->plc_supported = true;

    parent_gpu->no_ats_range_required = true;
+
+    // In Hopper there are not enough HW key slots available to support
+    // individual channel encryption keys, so channels on the same engine share
+    // the keys.
+    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_hopper_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_ce.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2023 NVIDIA Corporation
+    Copyright (c) 2020-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -91,6 +91,11 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
    NvU32 launch_dma_plc_mode;

+    UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "'fake'",
+                   uvm_gpu_name(gpu));
+
    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
                     SET_SEMAPHORE_PAYLOAD, payload);
@@ -109,6 +114,11 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
    NvU32 launch_dma_plc_mode;

+    UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "'fake'",
+                   uvm_gpu_name(gpu));
+
    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
                     SET_SEMAPHORE_PAYLOAD, payload);
@@ -127,14 +137,18 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N

 void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
 {
-    uvm_gpu_t *gpu;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
    NvU32 launch_dma_plc_mode;

+    UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "'fake'",
+                   uvm_gpu_name(gpu));
+
    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
                     SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);

-    gpu = uvm_push_get_gpu(push);
    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();

    NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
@@ -186,6 +200,7 @@ static NvU32 hopper_memset_copy_type(uvm_gpu_address_t dst)
 {
    if (g_uvm_global.conf_computing_enabled && dst.is_unprotected)
        return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
+
    return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
 }

@@ -345,14 +360,19 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    // In HCC, if a memset uses physical addressing for the destination, then
-    // it must write to (protected) vidmem. If the memset uses virtual
-    // addressing, and the backing storage is not vidmem, the access is only
-    // legal if the copy type is NONPROT2NONPROT, and the destination is
+    // In Confidential Computing, if a memset uses physical addressing for the
+    // destination, then it must write to (protected) vidmem. If the memset uses
+    // virtual addressing, and the backing storage is not vidmem, the access is
+    // only legal if the copy type is NONPROT2NONPROT, and the destination is
    // unprotected sysmem, but the validation does not detect it.
-    if (uvm_conf_computing_mode_is_hcc(gpu) && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
+    if (g_uvm_global.conf_computing_enabled && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
        return false;

+    if (uvm_gpu_address_is_peer(gpu, dst)) {
+        UVM_ERR_PRINT("Memset to peer address (0x%llx) is not allowed!", dst.address);
+        return false;
+    }
+
    if (!gpu->parent->ce_phys_vidmem_write_supported) {
        size_t size = num_elements * element_size;
        uvm_gpu_address_t temp = dst;
@@ -373,14 +393,22 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
 bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);

-    if (uvm_conf_computing_mode_is_hcc(gpu)) {
-        // In HCC, if a memcopy uses physical addressing for either the
-        // destination or the source, then the corresponding aperture must be
-        // vidmem. If virtual addressing is used, and the backing storage is
-        // sysmem the access is only legal if the copy type is NONPROT2NONPROT,
-        // but the validation does not detect it. In other words the copy
-        // source and destination is unprotected sysmem.
+    if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
+        UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
+                      src.address,
+                      dst.address);
+        return false;
+    }
+
+    if (g_uvm_global.conf_computing_enabled) {
+        // In Confidential Computing, if a memcopy uses physical addressing for
+        // either the destination or the source, then the corresponding aperture
+        // must be vidmem. If virtual addressing is used, and the backing
+        // storage is sysmem the access is only legal if the copy type is
+        // NONPROT2NONPROT, but the validation does not detect it. In other
+        // words the copy source and destination is unprotected sysmem.
        if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
            return false;

@@ -490,9 +518,8 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
    NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
    NvU64 iv_address;
    NvU32 iv_address_hi32, iv_address_lo32;
-    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));

    if (!src.is_virtual)
@@ -537,9 +564,8 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
 {

    NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
-    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));

    // The addressing mode (and aperture, if applicable) of the source and
@@ -565,4 +591,3 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,

    encrypt_or_decrypt(push, dst, src, size);
 }
-
--- a/kernel-open/nvidia-uvm/uvm_hopper_host.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_host.c
@@ -31,6 +31,12 @@
 void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "fake",
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);

@@ -49,6 +55,12 @@ void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
 void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "fake",
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
    NV_PUSH_5U(C86F, SEM_ADDR_LO,    HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
@@ -63,6 +75,12 @@ void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32
 void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel ? push->channel->name : "fake",
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);

--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@@ -54,6 +54,10 @@ static NvU32 page_table_depth_hopper(NvU64 page_size)
        return 4;
    else if (page_size == UVM_PAGE_SIZE_512M)
        return 3;
+
+    UVM_ASSERT((page_size == UVM_PAGE_SIZE_4K) || (page_size == UVM_PAGE_SIZE_64K) ||
+               (page_size == UVM_PAGE_SIZE_DEFAULT));
+
    return 5;
 }

--- a/kernel-open/nvidia-uvm/uvm_hopper_sec2.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_sec2.c
@@ -93,6 +93,11 @@ void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
    uvm_gpu_address_t sign_auth_tag_gpu_va;
    NvU32 *csl_sign_init = push->next;

+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->sec2_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(IS_ALIGNED(NvU64_LO32(gpu_va), 1 << HWSHIFT(CBA2, SEMAPHORE_B, LOWER)));

    sem_lo = READ_HWVALUE(NvU64_LO32(gpu_va), CBA2, SEMAPHORE_B, LOWER);
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2020 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
        return;

    if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
-                      atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
-                      uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
+                        atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
+                        uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
                        " insmod with uvm_leak_checker=2 for detailed information." :
                        "");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");

        if (g_uvm_global.unload_state.ptr)
            *g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
        uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
            uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);

-            printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
-                   uvm_kvsize((void *)((uintptr_t)info->node.key)),
-                   kbasename(info->file),
-                   info->line,
-                   info->function,
-                   info->node.key);
+            UVM_INFO_PRINT("    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
+                            uvm_kvsize((void *)((uintptr_t)info->node.key)),
+                            kbasename(info->file),
+                            info->line,
+                            info->function,
+                            info->node.key);

            // Free so we don't keep eating up memory while debugging. Note that
            // this also removes the entry from the table, frees info, and drops
--- a/kernel-open/nvidia-uvm/uvm_linux.c
+++ b/kernel-open/nvidia-uvm/uvm_linux.c
@@ -54,3 +54,30 @@ void uvm_memcg_context_end(uvm_memcg_context_t *context)
    mem_cgroup_put(context->new_memcg);
 }
 #endif
+
+#if !UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT()
+static int sg_dma_page_count(struct scatterlist *sg)
+{
+    return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
+}
+
+bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
+{
+    struct sg_page_iter *piter = &dma_iter->base;
+
+    if (!piter->__nents || !piter->sg)
+        return false;
+
+    piter->sg_pgoffset += piter->__pg_advance;
+    piter->__pg_advance = 1;
+
+    while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
+        piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
+        piter->sg = sg_next(piter->sg);
+        if (!--piter->__nents || !piter->sg)
+            return false;
+    }
+
+    return true;
+}
+#endif
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -84,9 +84,19 @@
 #include <linux/sched/task_stack.h>
 #endif

+#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
+#include <linux/scatterlist.h>
+#endif
+
 #include <linux/cpumask.h>
 #include <linux/topology.h>

+#if defined(NV_LINUX_DMA_DIRECT_H_PRESENT)
+#include <linux/dma-direct.h>
+#else
+#include <asm/dma-mapping.h>
+#endif
+
 #include "nv-kthread-q.h"

    #if defined(NV_CPUMASK_OF_NODE_PRESENT)
@@ -173,7 +183,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
            printk(fmt, ##__VA_ARGS__); \
    } while (0)

-#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
+#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)

 #if defined(NVCPU_X86)
 /* Some old IA32 kernels don't have 64/64 division routines,
@@ -382,4 +392,37 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
   return prot;
 }

+#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
+    // Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA page
+    // iterator") v5.0
+    struct sg_dma_page_iter {
+        struct sg_page_iter base;
+    };
+
+    #define uvm_sg_page_iter_dma_address(dma_iter)      \
+        sg_page_iter_dma_address(&((dma_iter)->base))
+#else
+    #define uvm_sg_page_iter_dma_address(dma_iter)      \
+        sg_page_iter_dma_address((dma_iter))
+#endif
+
+#if !defined(NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT)
+    // Added by commit 709d6d73c756 ("scatterlist: add generic wrappers for
+    // iterating over sgtable objects") v5.7.
+    #define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 0
+
+    static int sg_dma_page_count(struct scatterlist *sg);
+    bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
+
+    #define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset)         \
+        for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents,         \
+                                  pgoffset);                                    \
+            __sg_page_iter_dma_next(dma_iter);)
+
+    #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset)                  \
+            for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset)
+#else
+    #define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 1
+#endif
+
 #endif // _UVM_LINUX_H
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,12 +27,13 @@

 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
 {
-    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);

    switch (lock_order) {
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -69,6 +69,17 @@
 //
 //      This should be taken whenever global GPU state might need to be modified.
 //
+// - Access counters VA space enablement state lock
+//      Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
+//      Exclusive lock (mutex)
+//
+//      This protects VA space state associated with access counters enablement.
+//      Blackwell+ GPUs may have multiple access counters notification buffers
+//      and their "atomic" enablement is protected by this lock.
+//
+//      This should be taken whenever VA space access counters state might need
+//      to be modified.
+//
 // - GPU ISR lock
 //      Order: UVM_LOCK_ORDER_ISR
 //      Exclusive lock (mutex) per gpu
@@ -487,6 +498,7 @@ typedef enum
    UVM_LOCK_ORDER_INVALID = 0,
    UVM_LOCK_ORDER_GLOBAL_PM,
    UVM_LOCK_ORDER_GLOBAL,
+    UVM_LOCK_ORDER_ACCESS_COUNTERS,
    UVM_LOCK_ORDER_ISR,
    UVM_LOCK_ORDER_MMAP_LOCK,
    UVM_LOCK_ORDER_VA_SPACES_LIST,
@@ -523,6 +535,7 @@ typedef enum
    // This lock order can be removed after RM no longer relies on RPC event
    // notifications.
    UVM_LOCK_ORDER_CSL_CTX,
+
    UVM_LOCK_ORDER_LEAF,
    UVM_LOCK_ORDER_COUNT,
 } uvm_lock_order_t;
@@ -741,7 +754,8 @@ bool __uvm_locking_initialized(void);
        ret;                                            \
    })

-// Helper for calling a UVM-RM interface function that returns void with lock recording
+// Helper for calling a UVM-RM interface function that returns void with lock
+// recording
 #define uvm_rm_locked_call_void(call) ({                \
        uvm_record_lock_rm_all();                       \
        call;                                           \
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -35,6 +35,9 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->rm_va_base = 0;
    parent_gpu->rm_va_size = 128 * UVM_SIZE_1GB;

+    parent_gpu->peer_va_base = 0;
+    parent_gpu->peer_va_size = 0;
+
    parent_gpu->uvm_mem_va_base = 768 * UVM_SIZE_1GB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

@@ -60,8 +63,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = false;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
@@ -75,4 +76,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->plc_supported = false;

    parent_gpu->no_ats_range_required = false;
+
+    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -24,25 +24,29 @@
 #include "uvm_gpu.h"
 #include "uvm_hal.h"

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "enable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "disable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get)
 {
    UVM_ASSERT_MSG(false,
-                   "clear_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
@@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
    return 0;
 }

-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
    return false;
 }

-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
@@ -50,11 +50,26 @@ void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 o
                     OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
 }

+bool uvm_hal_maxwell_semaphore_target_is_valid(uvm_push_t *push, NvU64 gpu_va)
+{
+    if (uvm_gpu_address_is_peer(uvm_push_get_gpu(push), uvm_gpu_address_virtual(gpu_va))) {
+        UVM_ERR_PRINT("Semaphore operation targetting peer addresses is not allowed!");
+        return false;
+    }
+
+    return true;
+}
+
 void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    NvU32 flush_value;
    bool use_flush;

+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    use_flush = uvm_hal_membar_before_semaphore(push);

    if (use_flush)
@@ -76,6 +91,11 @@ void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va,
    NvU32 flush_value;
    bool use_flush;

+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    use_flush = uvm_hal_membar_before_semaphore(push);

    if (use_flush)
@@ -100,6 +120,11 @@ void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
    NvU32 flush_value;
    bool use_flush;

+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    use_flush = uvm_hal_membar_before_semaphore(push);

    if (use_flush)
@@ -185,6 +210,34 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void)
    return 0;
 }

+bool uvm_hal_maxwell_ce_memset_is_valid(uvm_push_t *push,
+                                        uvm_gpu_address_t dst,
+                                        size_t num_elements,
+                                        size_t element_size)
+{
+    if (uvm_gpu_address_is_peer(uvm_push_get_gpu(push), dst)) {
+        UVM_ERR_PRINT("Memset to peer address (0x%llx) is not allowed!", dst.address);
+        return false;
+    }
+
+    return true;
+}
+
+bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
+
+    if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
+        UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
+                      src.address,
+                      dst.address);
+        return false;
+    }
+
+    return true;
+}
+
 // Noop, since COPY_TYPE doesn't exist in Maxwell.
 NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
@@ -208,6 +261,12 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
                   push->channel->name,
                   uvm_gpu_name(gpu));

+    // Check if the copy is over NVLINK and simulate dropped traffic if there's
+    // an NVLINK error.
+    // Src address cannot be peer as that wouldn't pass the valid check above.
+    if (uvm_gpu_address_is_peer(gpu, dst) && uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK)
+        size = 0;
+
    gpu->parent->ce_hal->memcopy_patch_src(push, &src);

    launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
--- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2022 NVIDIA Corporation
+    Copyright (c) 2021-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -175,6 +175,12 @@ void uvm_hal_maxwell_host_interrupt(uvm_push_t *push)
 void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);

@@ -191,6 +197,12 @@ void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU3
 void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
    NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
@@ -204,6 +216,12 @@ void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU3
 void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
 {
    NvU32 sem_lo;
+
+    UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
+                   "Semaphore target validation failed in channel %s, GPU %s.\n",
+                   push->channel->name,
+                   uvm_gpu_name(uvm_push_get_gpu(push)));
+
    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);

@@ -312,11 +330,6 @@ void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
    UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
 }

-void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
-{
-    UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
-}
-
 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
                                                               const uvm_access_counter_buffer_entry_t *buffer_entry)
 {
--- a/kernel-open/nvidia-uvm/uvm_mem.c
+++ b/kernel-open/nvidia-uvm/uvm_mem.c
@@ -451,9 +451,11 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
    return gfp_flags;
 }

+
 // This allocation is a non-protected memory allocation under Confidential
 // Computing.
 //
+//
 // There is a tighter coupling between allocation and mapping because of the
 // allocator UVM must use. Hence, this function does the equivalent of
 // uvm_mem_map_gpu_phys().
@@ -708,7 +710,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
 {
    struct page **pages = mem->sysmem.pages;
    size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
-    pgprot_t prot = PAGE_KERNEL;
+    pgprot_t prot;

    UVM_ASSERT(uvm_mem_is_sysmem(mem));

@@ -725,6 +727,8 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)

    if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
        prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
+    else
+        prot = PAGE_KERNEL;

    mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);

@@ -992,7 +996,7 @@ uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_
    // Peer GPUs may need to use some form of translation (identity mappings,
    // indirect peers) to copy.
    chunk = mem_get_chunk(mem, offset, &chunk_offset);
-    copy_addr = uvm_pmm_gpu_peer_copy_address(&mem->backing_gpu->pmm, chunk, accessing_gpu);
+    copy_addr = uvm_gpu_peer_copy_address(mem->backing_gpu, chunk->address, accessing_gpu);
    copy_addr.address += chunk_offset;
    return copy_addr;
 }
--- a/kernel-open/nvidia-uvm/uvm_mem.h
+++ b/kernel-open/nvidia-uvm/uvm_mem.h
@@ -161,7 +161,7 @@ struct uvm_mem_struct
    // lifetime of the GPU. For CPU allocations there is no lifetime limitation.
    uvm_gpu_t *backing_gpu;

-    // For Confidential Computing, the accessing GPU needs to be known at alloc
+    // In Confidential Computing, the accessing GPU needs to be known at alloc
    // time for sysmem allocations.
    uvm_gpu_t *dma_owner;

--- a/kernel-open/nvidia-uvm/uvm_mem_test.c
+++ b/kernel-open/nvidia-uvm/uvm_mem_test.c
@@ -358,10 +358,8 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
    // Pascal+ can map sysmem with 4K, 64K and 2M PTEs, other GPUs can only use
    // 4K. Test all of the sizes supported by Pascal+ and 128K to match big page
    // size on pre-Pascal GPUs with 128K big page size.
-    // Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
-    // 2M, we don't test for this page size.
-    // Blackwell+ also supports 256G PTEs and the above holds for this case too.
-
+    // Ampere+ supports 512M PTEs and Blackwell+ supports 256G PTEs, but since
+    // UVM's maximum chunk size is 2M, we don't test for these page sizes.
    static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;

    // All supported page sizes will be tested, CPU has the most with 4 and +1
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -130,27 +130,12 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
    NV_STATUS status = NV_OK;
    NV_STATUS tracker_status;
+    uvm_prot_t prot = UVM_PROT_READ_WRITE_ATOMIC;

    // Get the mask of unmapped pages because it will change after the
    // first map operation
    uvm_va_block_unmapped_pages_get(va_block, region, &va_block_context->caller_page_mask);

-    if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
-        // Do not map pages that are already resident on the CPU. This is in
-        // order to avoid breaking system-wide atomic operations on HMM. HMM's
-        // implementation of system-side atomic operations involves restricting
-        // mappings to one processor (CPU or a GPU) at a time. If we were to
-        // grant a GPU a mapping to system memory, this gets into trouble
-        // because, on the CPU side, Linux can silently upgrade PTE permissions
-        // (move from read-only, to read-write, without any MMU notifiers
-        // firing), thus breaking the model by allowing simultaneous read-write
-        // access from two separate processors. To avoid that, just don't map
-        // such pages at all, when migrating.
-        uvm_page_mask_andnot(&va_block_context->caller_page_mask,
-                             &va_block_context->caller_page_mask,
-                             uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
-    }
-
    // Only map those pages that are not mapped anywhere else (likely due
    // to a first touch or a migration). We pass
    // UvmEventMapRemoteCauseInvalid since the destination processor of a
@@ -166,6 +151,31 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
    if (status != NV_OK)
        goto out;

+    if (uvm_va_block_is_hmm(va_block) && UVM_ID_IS_CPU(dest_id)) {
+        uvm_processor_id_t id;
+
+        // Do not atomically map pages that are resident on the CPU. This is in
+        // order to avoid breaking system-wide atomic operations on HMM. HMM's
+        // implementation of system-side atomic operations involves restricting
+        // mappings to one processor (CPU or a GPU) at a time. If we were to
+        // grant a GPU a mapping to system memory, this gets into trouble
+        // because, on the CPU side, Linux can silently upgrade PTE permissions
+        // (move from read-only, to read-write, without any MMU notifiers
+        // firing), thus breaking the model by allowing simultaneous read-write
+        // access from two separate processors. To avoid that, don't remote map
+        // such pages atomically, after migrating.
+        // Also note that HMM sets CPU mapping for resident pages so the mask
+        // of pages to be mapped needs to be recomputed without including the
+        // CPU mapping.
+        prot = UVM_PROT_READ_WRITE;
+        uvm_page_mask_region_fill(&va_block_context->caller_page_mask, region);
+        for_each_gpu_id_in_mask(id, &va_block->mapped) {
+            uvm_page_mask_andnot(&va_block_context->caller_page_mask,
+                                 &va_block_context->caller_page_mask,
+                                 uvm_va_block_map_mask_get(va_block, id));
+        }
+    }
+
    // Add mappings for AccessedBy processors
    //
    // No mappings within this call will operate on dest_id, so we don't
@@ -176,7 +186,7 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
                                                       dest_id,
                                                       region,
                                                       &va_block_context->caller_page_mask,
-                                                       UVM_PROT_READ_WRITE_ATOMIC,
+                                                       prot,
                                                       NULL);

 out:
@@ -227,6 +237,8 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
    uvm_assert_mutex_locked(&va_block->lock);
    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, region));

+    uvm_processor_mask_zero(&va_block_context->make_resident.all_involved_processors);
+
    if (uvm_va_block_is_hmm(va_block)) {
        status = uvm_hmm_va_block_migrate_locked(va_block,
                                                 va_block_retry,
@@ -269,6 +281,10 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
    if (out_tracker)
        tracker_status = uvm_tracker_add_tracker_safe(out_tracker, &va_block->tracker);

+    uvm_processor_mask_or(&service_context->gpus_to_check_for_nvlink_errors,
+                          &service_context->gpus_to_check_for_nvlink_errors,
+                          &va_block_context->make_resident.all_involved_processors);
+
    return status == NV_OK ? tracker_status : status;
 }

@@ -320,7 +336,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
 // The current logic checks that:
 // - We are in the first pass of the migration (see the explanation of the
 // two-pass strategy in uvm_migrate).
-// - The CPU has an NVLINK interconnect to the GPUs. Otherwise, we don't
+// - The CPU has an NVLINK or C2C interconnect to the GPUs. Otherwise, we don't
 // need this optimization since we are already limited by PCIe BW.
 // - If the migration spans several VA blocks, otherwise skip the preunmap to
 // avoid the overhead.
@@ -335,7 +351,7 @@ static bool migration_should_do_cpu_preunmap(uvm_va_space_t *va_space,
    if (pass != UVM_MIGRATE_PASS_FIRST || is_single_block)
        return false;

-    if (uvm_processor_mask_get_gpu_count(&va_space->has_nvlink[UVM_ID_CPU_VALUE]) == 0)
+    if (uvm_processor_mask_get_gpu_count(&va_space->has_fast_link[UVM_ID_CPU_VALUE]) == 0)
        return false;

    return true;
@@ -559,14 +575,14 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
    UVM_ASSERT(first_managed_range == uvm_va_space_iter_managed_first(va_space, base, base));

    managed_range_last = NULL;
-    uvm_for_each_va_range_managed_in_contig_from(managed_range, va_space, first_managed_range, end) {
+    uvm_for_each_va_range_managed_in_contig_from(managed_range, first_managed_range, end) {
        uvm_range_group_range_iter_t iter;
        uvm_va_policy_t *policy = &managed_range->policy;

        managed_range_last = managed_range;

        // For UVM-Lite GPUs, the CUDA driver may suballocate a single
-        // managed_range into many range groups.  For this reason, we iterate
+        // managed_range into many range groups. For this reason, we iterate
        // over each managed_range first then through the range groups within.
        uvm_range_group_for_each_migratability_in(&iter,
                                                  va_space,
@@ -624,7 +640,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
                             int dest_nid,
                             NvU32 migrate_flags,
                             uvm_va_range_managed_t *first_managed_range,
-                             uvm_tracker_t *out_tracker)
+                             uvm_tracker_t *out_tracker,
+                             uvm_processor_mask_t *gpus_to_check_for_nvlink_errors)
 {
    NV_STATUS status = NV_OK;
    uvm_service_block_context_t *service_context;
@@ -651,6 +668,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,

    service_context->block_context->make_resident.dest_nid = dest_nid;

+    uvm_processor_mask_zero(&service_context->gpus_to_check_for_nvlink_errors);
+
    // We perform two passes (unless the migration only covers a single VA
    // block or UVM_MIGRATE_FLAG_SKIP_CPU_MAP is passed). This helps in the
    // following scenarios:
@@ -707,6 +726,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
                                    out_tracker);
    }

+    uvm_processor_mask_copy(gpus_to_check_for_nvlink_errors, &service_context->gpus_to_check_for_nvlink_errors);
    uvm_service_block_context_free(service_context);

    return status;
@@ -845,9 +865,9 @@ NV_STATUS uvm_migrate_init(void)
        else {
            g_uvm_perf_migrate_cpu_preunmap_size = UVM_VA_BLOCK_SIZE << UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT;

-            pr_info("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
-                    uvm_perf_migrate_cpu_preunmap_block_order,
-                    UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
+            UVM_INFO_PRINT("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
+                           uvm_perf_migrate_cpu_preunmap_block_order,
+                           UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
        }
    }

@@ -871,6 +891,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
    bool flush_events = false;
    const bool synchronous = !(params->flags & UVM_MIGRATE_FLAG_ASYNC);
    int cpu_numa_node = (int)params->cpuNumaNode;
+    uvm_processor_mask_t *gpus_to_check_for_nvlink_errors = NULL;

    // We temporarily allow 0 length in the IOCTL parameters as a signal to
    // only release the semaphore. This is because user-space is in charge of
@@ -888,10 +909,15 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)

    if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
        UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
-        UVM_INFO_PRINT("TEMP\n");
        return NV_ERR_INVALID_ARGUMENT;
    }

+    gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
+    if (!gpus_to_check_for_nvlink_errors)
+        return NV_ERR_NO_MEMORY;
+
+    uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
+
    // mmap_lock will be needed if we have to create CPU mappings
    mm = uvm_va_space_mm_or_current_retain_lock(va_space);
    uvm_va_space_down_read(va_space);
@@ -986,6 +1012,8 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
                .populate_on_migrate_vma_failures   = true,
                .user_space_start                   = &params->userSpaceStart,
                .user_space_length                  = &params->userSpaceLength,
+                .gpus_to_check_for_nvlink_errors    = gpus_to_check_for_nvlink_errors,
+                .fail_on_unresolved_sto_errors      = false,
            };

            status = uvm_migrate_pageable(&uvm_migrate_args);
@@ -999,11 +1027,14 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
                                 (UVM_ID_IS_CPU(dest_id) ? cpu_numa_node : NUMA_NO_NODE),
                                 params->flags,
                                 uvm_va_space_iter_managed_first(va_space, params->base, params->base),
-                                 tracker_ptr);
+                                 tracker_ptr,
+                                 gpus_to_check_for_nvlink_errors);
        }
    }

 done:
+    uvm_global_gpu_retain(gpus_to_check_for_nvlink_errors);
+
    // We only need to hold mmap_lock to create new CPU mappings, so drop it if
    // we need to wait for the tracker to finish.
    //
@@ -1042,6 +1073,13 @@ done:
    uvm_va_space_up_read(va_space);
    uvm_va_space_mm_or_current_release(va_space, mm);

+    // Check for STO errors in case there was no other error until now.
+    if (status == NV_OK && !uvm_processor_mask_empty(gpus_to_check_for_nvlink_errors))
+        status = uvm_global_gpu_check_nvlink_error(gpus_to_check_for_nvlink_errors);
+
+    uvm_global_gpu_release(gpus_to_check_for_nvlink_errors);
+    uvm_processor_mask_cache_free(gpus_to_check_for_nvlink_errors);
+
    // If the migration is known to be complete, eagerly dispatch the migration
    // events, instead of processing them on a later event flush. Note that an
    // asynchronous migration could be complete by now, but the flush would not
@@ -1064,6 +1102,13 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
    NvU32 migrate_flags = 0;
    uvm_gpu_t *gpu = NULL;
+    uvm_processor_mask_t *gpus_to_check_for_nvlink_errors = NULL;
+
+    gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
+    if (!gpus_to_check_for_nvlink_errors)
+        return NV_ERR_NO_MEMORY;
+ 
+    uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);

    // mmap_lock will be needed if we have to create CPU mappings
    mm = uvm_va_space_mm_or_current_retain_lock(va_space);
@@ -1113,7 +1158,8 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
                                 NUMA_NO_NODE,
                                 migrate_flags,
                                 first_managed_range,
-                                 &local_tracker);
+                                 &local_tracker,
+                                 gpus_to_check_for_nvlink_errors);
        }

        if (status != NV_OK)
@@ -1121,6 +1167,8 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
    }

 done:
+    uvm_global_gpu_retain(gpus_to_check_for_nvlink_errors);
+
    // We only need to hold mmap_lock to create new CPU mappings, so drop it if
    // we need to wait for the tracker to finish.
    //
@@ -1138,5 +1186,12 @@ done:
    // This API is synchronous, so wait for migrations to finish
    uvm_tools_flush_events();

+    // Check for STO errors in case there was no other error until now.
+    if (status == NV_OK && tracker_status == NV_OK)
+        status = uvm_global_gpu_check_nvlink_error(gpus_to_check_for_nvlink_errors);
+
+    uvm_global_gpu_release(gpus_to_check_for_nvlink_errors);
+    uvm_processor_mask_cache_free(gpus_to_check_for_nvlink_errors);
+
    return status == NV_OK? tracker_status : status;
 }
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -62,10 +62,9 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
        *gpu_addr = uvm_gpu_address_copy(owning_gpu, uvm_gpu_page_to_phys_address(owning_gpu, page));
    }
    else if (owning_gpu && can_copy_from) {
-        uvm_gpu_identity_mapping_t *gpu_peer_mappings = uvm_gpu_get_peer_mapping(copying_gpu, owning_gpu->id);
        uvm_gpu_phys_address_t phys_addr = uvm_gpu_page_to_phys_address(owning_gpu, page);

-        *gpu_addr = uvm_gpu_address_virtual(gpu_peer_mappings->base + phys_addr.address);
+        *gpu_addr = uvm_gpu_peer_copy_address(owning_gpu, phys_addr.address, copying_gpu);
    }
    else {
        NV_STATUS status = uvm_parent_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
@@ -399,6 +398,38 @@ static NV_STATUS migrate_vma_populate_anon_pages(struct vm_area_struct *vma,
    return status;
 }

+static NV_STATUS zero_non_failed_pages_in_mask(uvm_push_t *push,
+                                               const unsigned long *pfns,
+                                               unsigned long *page_mask,
+                                               unsigned long mask_size,
+                                               migrate_vma_state_t *state)
+{
+    unsigned long i;
+    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
+    uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
+    uvm_gpu_t *zeroing_gpu = uvm_push_get_gpu(push);
+
+    for_each_set_bit(i, page_mask, mask_size) {
+        struct page *page;
+        uvm_gpu_address_t dst_address;
+        NV_STATUS status;
+
+        if (test_bit(i, state->allocation_failed_mask))
+            continue;
+
+        page = migrate_pfn_to_page(pfns[i]);
+        status = migrate_vma_page_copy_address(page, i, dst_id, zeroing_gpu, state, &dst_address);
+        if (status != NV_OK)
+            return status;
+
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+        zeroing_gpu->parent->ce_hal->memset_8(push, dst_address, 0, PAGE_SIZE);
+    }
+
+    return NV_OK;
+}
+
 static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
                                             const unsigned long *src,
                                             unsigned long *dst,
@@ -411,36 +442,82 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
    uvm_push_t push;
    unsigned long i;
    uvm_gpu_t *copying_gpu = NULL;
+    uvm_gpu_t *src_gpu = UVM_ID_IS_GPU(src_id) ? uvm_gpu_get(src_id) : NULL;
    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
    uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
    unsigned long *page_mask = state->processors[uvm_id_value(src_id)].page_mask;
    uvm_va_space_t *va_space = uvm_migrate_args->va_space;
+    uvm_tracker_t zero_tracker = UVM_TRACKER_INIT();

    UVM_ASSERT(!bitmap_empty(page_mask, state->num_pages));

+    // Pre-allocate the dst pages and mark the ones that failed
    for_each_set_bit(i, page_mask, state->num_pages) {
-        uvm_gpu_address_t src_address;
-        uvm_gpu_address_t dst_address;
-        struct page *src_page = migrate_pfn_to_page(src[i]);
-        struct page *dst_page;
-
-        UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
-        UVM_ASSERT(src_page);
-
-        dst_page = migrate_vma_alloc_page(state);
+        struct page *dst_page = migrate_vma_alloc_page(state);
        if (!dst_page) {
            __set_bit(i, state->allocation_failed_mask);
            continue;
        }

+        lock_page(dst_page);
+        dst[i] = migrate_pfn(page_to_pfn(dst_page));
+    }
+
+    // Zero destination pages in case of NVLINK copy that can hit STO or XC,
+    // or in case of injected unresolved NVLINK error.
+    // TODO: Bug 4922701: [uvm] Re-evaluate STO handling for ATS migrations
+    //       This can be removed if the false-positive rate of STO
+    //       fast-path is low enough to prefer failing the copy when an STO
+    //       fast-path error is detected.
+    if (UVM_ID_IS_GPU(src_id) &&
+        UVM_ID_IS_GPU(dst_id) &&
+        ((src_gpu->nvlink_status.enabled &&
+        (uvm_parent_gpu_peer_link_type(src_gpu->parent, uvm_gpu_get(dst_id)->parent) >= UVM_GPU_LINK_NVLINK_5)) ||
+        uvm_gpu_get_injected_nvlink_error(src_gpu) == NV_WARN_MORE_PROCESSING_REQUIRED)) {
+        uvm_gpu_t *dst_gpu = uvm_gpu_get(dst_id);
+        uvm_push_t zero_push;
+        
+        status = migrate_vma_zero_begin_push(va_space, dst_id, dst_gpu, start, outer - 1, &zero_push);
+        if (status != NV_OK)
+            return status;
+
+        status = zero_non_failed_pages_in_mask(&zero_push, dst, page_mask, state->num_pages, state);
+
+        uvm_push_end(&zero_push);
+
+        if (status == NV_OK)
+            status = uvm_tracker_add_push_safe(&zero_tracker, &zero_push);
+
+        if (status != NV_OK)
+            return status;
+    }
+
+    for_each_set_bit(i, page_mask, state->num_pages) {
+        uvm_gpu_address_t src_address;
+        uvm_gpu_address_t dst_address;
+        struct page *src_page = migrate_pfn_to_page(src[i]);
+        struct page *dst_page = migrate_pfn_to_page(dst[i]);
+
+        if (test_bit(i, state->allocation_failed_mask))
+            continue;
+
+        UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
+        UVM_ASSERT(src_page);
+        UVM_ASSERT(dst[i] & MIGRATE_PFN_VALID);
+        UVM_ASSERT(dst_page);
+
        if (!copying_gpu) {
            status = migrate_vma_copy_begin_push(va_space, dst_id, src_id, start, outer - 1, &push);
-            if (status != NV_OK) {
-                __free_page(dst_page);
-                return status;
-            }
+            if (status != NV_OK)
+                break;

            copying_gpu = uvm_push_get_gpu(&push);
+            if (src_gpu)
+                UVM_ASSERT(src_gpu == copying_gpu);
+
+            // The zero tracker will be empty if zeroing is not necessary
+            uvm_push_acquire_tracker(&push, &zero_tracker);
+            uvm_tracker_deinit(&zero_tracker);
        }
        else {
            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -452,18 +529,12 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
        if (status == NV_OK)
            status = migrate_vma_page_copy_address(dst_page, i, dst_id, copying_gpu, state, &dst_address);

-        if (status != NV_OK) {
-            __free_page(dst_page);
+        if (status != NV_OK)
            break;
-        }
-
-        lock_page(dst_page);

        // We'll push one membar later for all copies in this loop
        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
        copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, PAGE_SIZE);
-
-        dst[i] = migrate_pfn(page_to_pfn(dst_page));
    }

    // TODO: Bug 1766424: If the destination is a GPU and the copy was done by
@@ -523,6 +594,7 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
    unsigned long start = args->start;
    unsigned long outer = args->end;
    NV_STATUS tracker_status;
+    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;

    uvm_tracker_init(&state->tracker);

@@ -542,6 +614,40 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
    if (state->status == NV_OK)
        state->status = tracker_status;

+    // Check if the copy might have been impacted by NVLINK errors.
+    if (state->status == NV_OK) {
+        uvm_processor_id_t src_id;
+
+        for_each_id_in_mask(src_id, &state->src_processors) {
+            NV_STATUS status;
+
+            // Skip CPU source, even if for some reason the operation went over
+            // NVLINK, it'd be a read and hit poison.
+            if (UVM_ID_IS_CPU(src_id))
+                continue;
+
+            UVM_ASSERT(UVM_ID_IS_GPU(src_id));
+            status = uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_get(src_id));
+
+            // Set state->status to the first error if there's an NVLINK error.
+            // Do not report NV_WARN_MORE_PROCESSING_REQUIRED. The call to the
+            // uvm_migrate_vma_copy_pages above zeroed the destination.
+            // Thus in case of real STO error zeroed pages will be mapped.
+            if (state->status == NV_OK && status != NV_WARN_MORE_PROCESSING_REQUIRED)
+                state->status = status;
+
+            // Record unresolved GPU errors if the caller can use the information
+            if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
+                if (uvm_migrate_args->gpus_to_check_for_nvlink_errors)
+                    uvm_processor_mask_set(uvm_migrate_args->gpus_to_check_for_nvlink_errors, src_id);
+
+                // fail the copy if requested by the caller
+                if (uvm_migrate_args->fail_on_unresolved_sto_errors && state->status == NV_OK)
+                    state->status = NV_ERR_BUSY_RETRY;
+            }
+        }
+    }
+
    // Mark all pages as not migrating if we're failing
    if (state->status != NV_OK)
        migrate_vma_cleanup_pages(args->dst, state->num_pages);
@@ -870,6 +976,14 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
    if (va_space->test.skip_migrate_vma)
        return NV_WARN_NOTHING_TO_DO;

+    // This isn't the right path for a UVM-owned vma. In most cases the callers
+    // will take the correct (managed) path, but we can get here if invoked on a
+    // disabled vma (see uvm_disable_vma()) that has no VA range but still has a
+    // vma. This could cause locking issues if the caller has the VA space
+    // locked and we invoke a UVM fault handler, so avoid it entirely.
+    if (uvm_file_is_nvidia_uvm(vma->vm_file))
+        return NV_ERR_INVALID_ADDRESS;
+
    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
    //       support for it is added to the Linux kernel
    if (!vma_is_anonymous(vma))
@@ -1002,9 +1116,12 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
            return NV_ERR_INVALID_ARGUMENT;
    }
    else {
+        uvm_gpu_t *gpu = uvm_gpu_get(dst_id);
+
        // Incoming dst_node_id is only valid if dst_id belongs to the CPU. Use
        // dst_node_id as the GPU node id if dst_id doesn't belong to the CPU.
-        uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(uvm_gpu_get(dst_id));
+        UVM_ASSERT(gpu->mem_info.numa.enabled);
+        uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
    }

    state = kmem_cache_alloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bernhard Stoeckner	8ec351aeb9	570.144	2025-04-27 09:05:17 +02:00
Bernhard Stoeckner	e8113f665d	570.133.20	2025-04-17 17:56:49 +02:00
Bernhard Stoeckner	c5e439fea4	570.133.07	2025-03-19 14:13:05 +01:00
Bernhard Stoeckner	25bef4626e	570.124.06	2025-03-03 19:08:20 +01:00
Bernhard Stoeckner	129479b1b7	570.124.04	2025-02-27 17:32:23 +01:00
Bernhard Stoeckner	81fe4fb417	570.86.16	2025-01-30 17:37:03 +01:00
Bernhard Stoeckner	54d69484da	570.86.15	2025-01-27 19:36:56 +01:00
Bernhard Stoeckner	9d0b0414a5	565.77	2024-12-05 16:37:35 +01:00