550.120

550.107.02
2026-01-27 19:49:47 +00:00 · 2024-09-20 12:40:39 -07:00 · 2024-07-29 10:22:58 +02:00
1345 changed files with 157270 additions and 253519 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 560.31.02.
+version 550.120.


 ## How to Build
@@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-560.31.02 driver release.  This can be achieved by installing
+550.120 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@@ -74,7 +74,7 @@ kernel.

 The NVIDIA open kernel modules support the same range of Linux kernel
 versions that are supported with the proprietary NVIDIA kernel modules.
-This is currently Linux kernel 4.15 or newer.
+This is currently Linux kernel 3.10 or newer.


 ## How to Contribute
@@ -179,13 +179,16 @@ software applications.

 ## Compatible GPUs

-The NVIDIA open kernel modules can be used on any Turing or later GPU (see the
-table below).
+The NVIDIA open kernel modules can be used on any Turing or later GPU
+(see the table below). However, in the __DRIVER_VERION__ release, GeForce and
+Workstation support is considered to be Beta quality. The open kernel modules
+are suitable for broad usage, and NVIDIA requests feedback on any issues
+encountered specific to them.

 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/560.31.02/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/550.120/README/kernel_open.html

 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
@@ -831,12 +834,10 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 2050                         | 25AD           |
 | NVIDIA RTX A1000                                | 25B0 1028 1878 |
 | NVIDIA RTX A1000                                | 25B0 103C 1878 |
-| NVIDIA RTX A1000                                | 25B0 103C 8D96 |
 | NVIDIA RTX A1000                                | 25B0 10DE 1878 |
 | NVIDIA RTX A1000                                | 25B0 17AA 1878 |
 | NVIDIA RTX A400                                 | 25B2 1028 1879 |
 | NVIDIA RTX A400                                 | 25B2 103C 1879 |
-| NVIDIA RTX A400                                 | 25B2 103C 8D95 |
 | NVIDIA RTX A400                                 | 25B2 10DE 1879 |
 | NVIDIA RTX A400                                 | 25B2 17AA 1879 |
 | NVIDIA A16                                      | 25B6 10DE 14A9 |
@@ -857,7 +858,6 @@ Subsystem Device ID.
 | NVIDIA RTX A500 Embedded GPU                    | 25FB           |
 | NVIDIA GeForce RTX 4090                         | 2684           |
 | NVIDIA GeForce RTX 4090 D                       | 2685           |
-| NVIDIA GeForce RTX 4070 Ti SUPER                | 2689           |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 1028 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 103C 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 10DE 16A1 |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.31.02\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.120\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@@ -118,7 +118,7 @@ ifeq ($(ARCH),x86_64)
 endif

 ifeq ($(ARCH),powerpc)
- EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
+ EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
 endif

 EXTRA_CFLAGS += -DNV_UVM_ENABLE
@@ -172,7 +172,6 @@ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
 NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
 NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
 NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
-NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
 NV_CONFTEST_CFLAGS += -Wno-error

 NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
--- a/kernel-open/common/inc/nv-firmware.h
+++ b/kernel-open/common/inc/nv-firmware.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -44,7 +44,6 @@ typedef enum
    NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
    NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
    NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
-    NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
    NV_FIRMWARE_CHIP_FAMILY_END,
 } nv_firmware_chip_family_t;

@@ -53,7 +52,6 @@ static inline const char *nv_firmware_chip_family_to_string(
 )
 {
    switch (fw_chip_family) {
-        case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
        case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
        case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
        case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
@@ -68,13 +66,13 @@ static inline const char *nv_firmware_chip_family_to_string(
    return NULL;
 }

-// The includer may optionally define
-// NV_FIRMWARE_FOR_NAME(name)
-// to return a platform-defined string for a given a gsp_* or gsp_log_* name.
+// The includer (presumably nv.c) may optionally define
+// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
+// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
 //
-// The function nv_firmware_for_chip_family will then be available.
-#if defined(NV_FIRMWARE_FOR_NAME)
-static inline const char *nv_firmware_for_chip_family(
+// The function nv_firmware_path will then be available.
+#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
+static inline const char *nv_firmware_path(
    nv_firmware_type_t fw_type,
    nv_firmware_chip_family_t fw_chip_family
 )
@@ -83,16 +81,15 @@ static inline const char *nv_firmware_for_chip_family(
    {
        switch (fw_chip_family)
        {
-            case NV_FIRMWARE_CHIP_FAMILY_GB10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
-                return NV_FIRMWARE_FOR_NAME("gsp_ga10x");
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");

            case NV_FIRMWARE_CHIP_FAMILY_GA100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU11X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU10X:
-                return NV_FIRMWARE_FOR_NAME("gsp_tu10x");
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");

            case NV_FIRMWARE_CHIP_FAMILY_END:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_NULL:
@@ -103,16 +100,15 @@ static inline const char *nv_firmware_for_chip_family(
    {
        switch (fw_chip_family)
        {
-            case NV_FIRMWARE_CHIP_FAMILY_GB10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
-                return NV_FIRMWARE_FOR_NAME("gsp_log_ga10x");
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");

            case NV_FIRMWARE_CHIP_FAMILY_GA100:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU11X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU10X:
-                return NV_FIRMWARE_FOR_NAME("gsp_log_tu10x");
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");

            case NV_FIRMWARE_CHIP_FAMILY_END:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_NULL:
@@ -122,15 +118,15 @@ static inline const char *nv_firmware_for_chip_family(

    return "";
 }
-#endif  // defined(NV_FIRMWARE_FOR_NAME)
+#endif  // defined(NV_FIRMWARE_PATH_FOR_FILENAME)

-// The includer may optionally define
-// NV_FIRMWARE_DECLARE_GSP(name)
+// The includer (presumably nv.c) may optionally define
+// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
 // which will then be invoked (at the top-level) for each
-// gsp_* (but not gsp_log_*)
-#if defined(NV_FIRMWARE_DECLARE_GSP)
-NV_FIRMWARE_DECLARE_GSP("gsp_ga10x")
-NV_FIRMWARE_DECLARE_GSP("gsp_tu10x")
-#endif  // defined(NV_FIRMWARE_DECLARE_GSP)
+// gsp_*.bin (but not gsp_log_*.bin)
+#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
+NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
+NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
+#endif  // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)

-#endif  // NV_FIRMWARE_DECLARE_GSP
+#endif  // NV_FIRMWARE_DECLARE_GSP_FILENAME
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -58,10 +58,14 @@
 #include <linux/version.h>
 #include <linux/utsname.h>

-#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
-// Version 4.4 is allowed, temporarily, although not officially supported.
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
-#error "This driver does not support kernels older than Linux 4.15!"
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#error "This driver does not support kernels older than 2.6.32!"
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0)
+#  define KERNEL_2_6
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
+#  define KERNEL_3
+#else
+#error "This driver does not support development kernels!"
 #endif

 #if defined (CONFIG_SMP) && !defined (__SMP__)
@@ -840,16 +844,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 #define NV_PRINT_AT(nv_debug_level,at)                                           \
    {                                                                            \
        nv_printf(nv_debug_level,                                                \
-            "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, "                    \
+            "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, "    \
            "page_table = 0x%p\n",  __FUNCTION__, __LINE__, at,                  \
            at->num_pages, NV_ATOMIC_READ(at->usage_count),                      \
-            at->page_table);                                                     \
+            at->flags, at->page_table);                                          \
    }

 #define NV_PRINT_VMA(nv_debug_level,vma)                                                 \
    {                                                                                    \
        nv_printf(nv_debug_level,                                                        \
-            "NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08lx bytes @ 0x%016llx, 0x%p, 0x%p\n",    \
+            "NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n",    \
            __FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma),        \
            NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma));                  \
    }
@@ -1082,8 +1086,6 @@ static inline void nv_kmem_ctor_dummy(void *arg)
        kmem_cache_destroy(kmem_cache);     \
    }

-#define NV_KMEM_CACHE_ALLOC_ATOMIC(kmem_cache)     \
-    kmem_cache_alloc(kmem_cache, GFP_ATOMIC)
 #define NV_KMEM_CACHE_ALLOC(kmem_cache)     \
    kmem_cache_alloc(kmem_cache, GFP_KERNEL)
 #define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \
@@ -1110,23 +1112,6 @@ static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
 #endif
 }

-static inline int nv_kmem_cache_alloc_stack_atomic(nvidia_stack_t **stack)
-{
-    nvidia_stack_t *sp = NULL;
-#if defined(NVCPU_X86_64)
-    if (rm_is_altstack_in_use())
-    {
-        sp = NV_KMEM_CACHE_ALLOC_ATOMIC(nvidia_stack_t_cache);
-        if (sp == NULL)
-            return -ENOMEM;
-        sp->size = sizeof(sp->stack);
-        sp->top = sp->stack + sp->size;
-    }
-#endif
-    *stack = sp;
-    return 0;
-}
-
 static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
 {
    nvidia_stack_t *sp = NULL;
@@ -1182,16 +1167,6 @@ typedef struct nvidia_pte_s {
    unsigned int    page_count;
 } nvidia_pte_t;

-#if defined(CONFIG_DMA_SHARED_BUFFER)
-/* Standard dma_buf-related information. */
-struct nv_dma_buf
-{
-    struct dma_buf *dma_buf;
-    struct dma_buf_attachment *dma_attach;
-    struct sg_table *sgt;
-};
-#endif // CONFIG_DMA_SHARED_BUFFER
-
 typedef struct nv_alloc_s {
    struct nv_alloc_s *next;
    struct device     *dev;
--- a/kernel-open/common/inc/nv-mm.h
+++ b/kernel-open/common/inc/nv-mm.h
@@ -29,17 +29,17 @@
 typedef int vm_fault_t;
 #endif

-/*
- * pin_user_pages()
- *
+/* pin_user_pages
 * Presence of pin_user_pages() also implies the presence of unpin-user_page().
- * Both were added in the v5.6.
+ * Both were added in the v5.6-rc1
 *
- * pin_user_pages() was added by commit eddb1c228f79
- * ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
+ * pin_user_pages() was added by commit eddb1c228f7951d399240
+ * ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
+ *
+ * Removed vmas parameter from pin_user_pages() by commit 40896a02751
+ * ("mm/gup: remove vmas parameter from pin_user_pages()")
+ * in linux-next, expected in v6.5-rc1 (2023-05-17)
 *
- * Removed vmas parameter from pin_user_pages() by commit 4c630f307455
- * ("mm/gup: remove vmas parameter from pin_user_pages()") in v6.5.
 */

 #include <linux/mm.h>
@@ -63,28 +63,25 @@ typedef int vm_fault_t;
    #define NV_UNPIN_USER_PAGE put_page
 #endif // NV_PIN_USER_PAGES_PRESENT

-/*
- * get_user_pages()
+/* get_user_pages
 *
- * The 8-argument version of get_user_pages() was deprecated by commit
- * cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
+ * The 8-argument version of get_user_pages was deprecated by commit
+ * (2016 Feb 12: cde70140fed8429acf7a14e2e2cbd3e329036653)for the non-remote case
 * (calling get_user_pages with current and current->mm).
 *
- * Completely moved to the 6 argument version of get_user_pages() by
- * commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
- * helpers from the get_user*() APIs") in v4.6-rc4.
+ * Completely moved to the 6 argument version of get_user_pages -
+ * 2016 Apr 4: c12d2da56d0e07d230968ee2305aaa86b93a6832
 *
- * write and force parameters were replaced with gup_flags by
- * commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
- * with gup_flags") in v4.9.
+ * write and force parameters were replaced with gup_flags by -
+ * 2016 Oct 12: 768ae309a96103ed02eb1e111e838c87854d8b51
 *
 * A 7-argument version of get_user_pages was introduced into linux-4.4.y by
- * commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
- * with gup_flags") which cherry-picked the replacement of the write and
- * force parameters with gup_flags.
+ * commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
+ * replacement of the write and force parameters with gup_flags
 *
- * Removed vmas parameter from get_user_pages() by commit 54d020692b34
- * ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
+ * Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
+ * ("mm/gup: remove unused vmas parameter from get_user_pages()")
+ * in linux-next, expected in v6.5-rc1 (2023-05-17)
 *
 */

@@ -115,19 +112,18 @@ typedef int vm_fault_t;
    }
 #endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS

-/*
- * pin_user_pages_remote()
+/* pin_user_pages_remote
 *
- * pin_user_pages_remote() was added by commit eddb1c228f79
- * ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
+ * pin_user_pages_remote() was added by commit eddb1c228f7951d399240
+ * ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
 *
 * pin_user_pages_remote() removed 'tsk' parameter by commit
- * 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
- * in v5.9.
+ * 64019a2e467a ("mm/gup: remove task_struct pointer for  all gup code")
+ * in v5.9-rc1 (2020-08-11). *
 *
 * Removed unused vmas parameter from pin_user_pages_remote() by commit
- * 0b295316b3a9 ("mm/gup: remove unused vmas parameter from
- * pin_user_pages_remote()") in v6.5.
+ * 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
+ * in linux-next, expected in v6.5-rc1 (2023-05-14)
 *
 */

@@ -147,7 +143,7 @@ typedef int vm_fault_t;

 /*
 * get_user_pages_remote() was added by commit 1e9877902dc7
- * ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
+ * ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
 *
 * Note that get_user_pages_remote() requires the caller to hold a reference on
 * the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
@@ -157,17 +153,19 @@ typedef int vm_fault_t;
 *
 * get_user_pages_remote() write/force parameters were replaced
 * with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
- * write/force parameters with gup_flags") in v4.9.
+ * write/force parameters with gup_flags") in v4.9 (2016-10-13).
 *
 * get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
- * ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
+ * ("mm: add locked parameter to get_user_pages_remote()") in
+ * v4.10 (2016-12-14).
 *
 * get_user_pages_remote() removed 'tsk' parameter by
 * commit 64019a2e467a ("mm/gup: remove task_struct pointer for
- * all gup code") in v5.9.
+ * all gup code") in v5.9-rc1 (2020-08-11).
 *
- * Removed vmas parameter from get_user_pages_remote() by commit ca5e863233e8
- * ("mm/gup: remove vmas parameter from get_user_pages_remote()") in v6.5.
+ * Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549 
+ * ("mm/gup: remove vmas parameter from get_user_pages_remote()")
+ * in linux-next, expected in v6.5-rc1 (2023-05-14)
 *
 */

--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -110,15 +110,15 @@ typedef enum _TEGRASOC_WHICH_CLK
    TEGRASOC_WHICH_CLK_DSIPLL_CLKOUTPN,
    TEGRASOC_WHICH_CLK_DSIPLL_CLKOUTA,
    TEGRASOC_WHICH_CLK_SPPLL0_VCO,
+    TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTPN,
    TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTA,
    TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTB,
-    TEGRASOC_WHICH_CLK_SPPLL0_CLKOUTPN,
-    TEGRASOC_WHICH_CLK_SPPLL1_CLKOUTPN,
-    TEGRASOC_WHICH_CLK_SPPLL0_DIV27,
-    TEGRASOC_WHICH_CLK_SPPLL1_DIV27,
    TEGRASOC_WHICH_CLK_SPPLL0_DIV10,
    TEGRASOC_WHICH_CLK_SPPLL0_DIV25,
+    TEGRASOC_WHICH_CLK_SPPLL0_DIV27,
    TEGRASOC_WHICH_CLK_SPPLL1_VCO,
+    TEGRASOC_WHICH_CLK_SPPLL1_CLKOUTPN,
+    TEGRASOC_WHICH_CLK_SPPLL1_DIV27,
    TEGRASOC_WHICH_CLK_VPLL0_REF,
    TEGRASOC_WHICH_CLK_VPLL0,
    TEGRASOC_WHICH_CLK_VPLL1,
@@ -132,7 +132,7 @@ typedef enum _TEGRASOC_WHICH_CLK
    TEGRASOC_WHICH_CLK_DSI_PIXEL,
    TEGRASOC_WHICH_CLK_PRE_SOR0,
    TEGRASOC_WHICH_CLK_PRE_SOR1,
-    TEGRASOC_WHICH_CLK_DP_LINKA_REF,
+    TEGRASOC_WHICH_CLK_DP_LINK_REF,
    TEGRASOC_WHICH_CLK_SOR_LINKA_INPUT,
    TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO,
    TEGRASOC_WHICH_CLK_SOR_LINKA_AFIFO_M,
@@ -143,7 +143,7 @@ typedef enum _TEGRASOC_WHICH_CLK
    TEGRASOC_WHICH_CLK_PLLHUB,
    TEGRASOC_WHICH_CLK_SOR0,
    TEGRASOC_WHICH_CLK_SOR1,
-    TEGRASOC_WHICH_CLK_SOR_PADA_INPUT,
+    TEGRASOC_WHICH_CLK_SOR_PAD_INPUT,
    TEGRASOC_WHICH_CLK_PRE_SF0,
    TEGRASOC_WHICH_CLK_SF0,
    TEGRASOC_WHICH_CLK_SF1,
@@ -332,9 +332,7 @@ typedef struct nv_soc_irq_info_s {

 #define NV_MAX_SOC_IRQS              6
 #define NV_MAX_DPAUX_NUM_DEVICES     4
-
-#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
-
+#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING

 #define NV_IGPU_LEGACY_STALL_IRQ     70
 #define NV_IGPU_MAX_STALL_IRQS       3
@@ -497,6 +495,12 @@ typedef struct nv_state_t
    } iommus;
 } nv_state_t;

+// These define need to be in sync with defines in system.h
+#define OS_TYPE_LINUX   0x1
+#define OS_TYPE_FREEBSD 0x2
+#define OS_TYPE_SUNOS   0x3
+#define OS_TYPE_VMWARE  0x4
+
 #define NVFP_TYPE_NONE       0x0
 #define NVFP_TYPE_REFCOUNTED 0x1
 #define NVFP_TYPE_REGISTERED 0x2
@@ -605,15 +609,6 @@ typedef enum
    NV_POWER_STATE_RUNNING
 } nv_power_state_t;

-typedef struct
-{
-    const char *vidmem_power_status;
-    const char *dynamic_power_status;
-    const char *gc6_support;
-    const char *gcoff_support;
-    const char *s0ix_status;
-} nv_power_info_t;
-
 #define NV_PRIMARY_VGA(nv)      ((nv)->primary_vga)

 #define NV_IS_CTL_DEVICE(nv)    ((nv)->flags & NV_FLAG_CONTROL)
@@ -783,7 +778,7 @@ nv_state_t*  NV_API_CALL  nv_get_ctl_state       (void);

 void   NV_API_CALL  nv_set_dma_address_size      (nv_state_t *, NvU32 );

-NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
+NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  nv_alloc_pages           (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  nv_free_pages            (nv_state_t *, NvU32, NvBool, NvU32, void *);

@@ -827,7 +822,6 @@ void   NV_API_CALL  nv_acpi_methods_init         (NvU32 *);
 void   NV_API_CALL  nv_acpi_methods_uninit       (void);

 NV_STATUS  NV_API_CALL  nv_acpi_method           (NvU32, NvU32, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
-NV_STATUS  NV_API_CALL  nv_acpi_d3cold_dsm_for_upstream_port (nv_state_t *, NvU8 *, NvU32, NvU32, NvU32 *);
 NV_STATUS  NV_API_CALL  nv_acpi_dsm_method       (nv_state_t *, NvU8 *, NvU32, NvBool, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
 NV_STATUS  NV_API_CALL  nv_acpi_ddc_method       (nv_state_t *, void *, NvU32 *, NvBool);
 NV_STATUS  NV_API_CALL  nv_acpi_dod_method       (nv_state_t *, NvU32 *, NvU32 *);
@@ -889,6 +883,8 @@ void      NV_API_CALL nv_cap_drv_exit(void);
 NvBool    NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
 NvBool    NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);

+NvU32     NV_API_CALL nv_get_os_type(void);
+
 void      NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
 void      NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);

@@ -994,10 +990,10 @@ NV_STATUS  NV_API_CALL  rm_p2p_init_mapping       (nvidia_stack_t *, NvU64, NvU6
 NV_STATUS  NV_API_CALL  rm_p2p_destroy_mapping    (nvidia_stack_t *, NvU64);
 NV_STATUS  NV_API_CALL  rm_p2p_get_pages          (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_get_gpu_info       (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
-NV_STATUS  NV_API_CALL  rm_p2p_get_pages_persistent (nvidia_stack_t *,  NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
+NV_STATUS  NV_API_CALL  rm_p2p_get_pages_persistent (nvidia_stack_t *,  NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_register_callback  (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_put_pages          (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
-NV_STATUS  NV_API_CALL  rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
+NV_STATUS  NV_API_CALL  rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_dma_map_pages      (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
 void       NV_API_CALL  rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
@@ -1031,7 +1027,9 @@ void       NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_s
 NV_STATUS  NV_API_CALL rm_ref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
 void       NV_API_CALL rm_unref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
 NV_STATUS  NV_API_CALL rm_transition_dynamic_power(nvidia_stack_t *, nv_state_t *, NvBool, NvBool *);
-void       NV_API_CALL rm_get_power_info(nvidia_stack_t *, nv_state_t *, nv_power_info_t *);
+const char* NV_API_CALL rm_get_vidmem_power_status(nvidia_stack_t *, nv_state_t *);
+const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *, nv_state_t *);
+const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);

 void       NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
 void       NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
@@ -1079,9 +1077,6 @@ NV_STATUS   NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t
 void        NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
 void        NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);

-// Host1x specific functions.
-NV_STATUS NV_API_CALL nv_get_syncpoint_aperture(NvU32, NvU64 *, NvU64 *, NvU32 *);
-
 #if defined(NVCPU_X86_64)

 static inline NvU64 nv_rdtsc(void)
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -592,6 +592,13 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
    Error codes:
      NV_ERR_GENERIC
      NV_ERR_NO_MEMORY
+      NV_ERR_INVALID_STATE
+      NV_ERR_NOT_SUPPORTED
+      NV_ERR_NOT_READY
+      NV_ERR_INVALID_LOCK_STATE
+      NV_ERR_INVALID_STATE
+      NV_ERR_NVSWITCH_FABRIC_NOT_READY
+      NV_ERR_NVSWITCH_FABRIC_FAILURE
 */
 NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
                                  UvmGpuCaps *caps);
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -39,13 +39,12 @@
 // are multiple BIG page sizes in RM. These defines are used as flags to "0"
 // should be OK when user is not sure which pagesize allocation it wants
 //
-#define UVM_PAGE_SIZE_DEFAULT    0x0ULL
-#define UVM_PAGE_SIZE_4K         0x1000ULL
-#define UVM_PAGE_SIZE_64K        0x10000ULL
-#define UVM_PAGE_SIZE_128K       0x20000ULL
-#define UVM_PAGE_SIZE_2M         0x200000ULL
-#define UVM_PAGE_SIZE_512M       0x20000000ULL
-#define UVM_PAGE_SIZE_256G       0x4000000000ULL
+#define UVM_PAGE_SIZE_DEFAULT    0x0
+#define UVM_PAGE_SIZE_4K         0x1000
+#define UVM_PAGE_SIZE_64K        0x10000
+#define UVM_PAGE_SIZE_128K       0x20000
+#define UVM_PAGE_SIZE_2M         0x200000
+#define UVM_PAGE_SIZE_512M       0x20000000

 //
 // When modifying flags, make sure they are compatible with the mirrored
@@ -396,7 +395,6 @@ typedef enum
    UVM_LINK_TYPE_NVLINK_2,
    UVM_LINK_TYPE_NVLINK_3,
    UVM_LINK_TYPE_NVLINK_4,
-    UVM_LINK_TYPE_NVLINK_5,
    UVM_LINK_TYPE_C2C,
 } UVM_LINK_TYPE;

@@ -568,6 +566,11 @@ typedef struct UvmGpuP2PCapsParams_tag
    // second, not taking into account the protocols overhead. The reported
    // bandwidth for indirect peers is zero.
    NvU32 totalLinkLineRateMBps;
+
+    // Out: True if the peers have a indirect link to communicate. On P9
+    // systems, this is true if peers are connected to different NPUs that
+    // forward the requests between them.
+    NvU32 indirectAccess      : 1;
 } UvmGpuP2PCapsParams;

 // Platform-wide information
@@ -592,10 +595,8 @@ typedef struct UvmGpuClientInfo_tag

 typedef enum
 {
-    UVM_GPU_CONF_COMPUTE_MODE_NONE,
-    UVM_GPU_CONF_COMPUTE_MODE_APM,
-    UVM_GPU_CONF_COMPUTE_MODE_HCC,
-    UVM_GPU_CONF_COMPUTE_MODE_COUNT
+    UVM_GPU_CONF_COMPUTE_MODE_NONE = 0,
+    UVM_GPU_CONF_COMPUTE_MODE_HCC = 2
 } UvmGpuConfComputeMode;

 typedef struct UvmGpuConfComputeCaps_tag
@@ -706,13 +707,6 @@ typedef struct UvmGpuInfo_tag

    // EGM base address to offset in the GMMU PTE entry for EGM mappings
    NvU64    egmBaseAddr;
-
-    // If connectedToSwitch is NV_TRUE,
-    // nvswitchEgmMemoryWindowStart tells the base address for the GPU's EGM memory in the
-    // NVSwitch address space. It is used when creating PTEs of GPU memory mappings
-    // to NVSwitch peers.
-    NvU64 nvswitchEgmMemoryWindowStart;
-
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
--- a/kernel-open/common/inc/nvkms-api-types.h
+++ b/kernel-open/common/inc/nvkms-api-types.h
@@ -440,9 +440,9 @@ struct NvKmsLayerCapabilities {
    NvBool supportsWindowMode              :1;

    /*!
-     * Whether layer supports ICtCp pipe.
+     * Whether layer supports HDR pipe.
     */
-    NvBool supportsICtCp                   :1;
+    NvBool supportsHDR                     :1;


    /*!
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -158,17 +158,13 @@ struct NvKmsKapiDeviceResourcesInfo {

        NvU32 hasVideoMemory;

-        NvU32 numDisplaySemaphores;
-
        NvU8  genericPageKind;

        NvBool  supportsSyncpts;
-
-        NvBool requiresVrrSemaphores;
    } caps;

    NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
-    NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
+    NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
 };

 #define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@@ -214,26 +210,18 @@ struct NvKmsKapiStaticDisplayInfo {
    NvU32 headMask;
 };

-struct NvKmsKapiSyncParams {
-    union {
-        struct {
-            /*!
-             * Possible syncpt use case in kapi.
-             * For pre-syncpt, use only id and value
-             * and for post-syncpt, use only fd.
-             */
-            NvU32   preSyncptId;
-            NvU32   preSyncptValue;
-        } syncpt;
+struct NvKmsKapiSyncpt {

-        struct {
-            NvU32 index;
-        } semaphore;
-    } u;
+    /*!
+     * Possible syncpt use case in kapi.
+     * For pre-syncpt, use only id and value
+     * and for post-syncpt, use only fd.
+     */
+    NvBool  preSyncptSpecified;
+    NvU32   preSyncptId;
+    NvU32   preSyncptValue;

-    NvBool preSyncptSpecified;
-    NvBool postSyncptRequested;
-    NvBool semaphoreSpecified;
+    NvBool  postSyncptRequested;
 };

 struct NvKmsKapiLayerConfig {
@@ -243,7 +231,7 @@ struct NvKmsKapiLayerConfig {
        NvU8 surfaceAlpha;
    } compParams;
    struct NvKmsRRParams rrParams;
-    struct NvKmsKapiSyncParams syncParams;
+    struct NvKmsKapiSyncpt syncptParams;

    struct {
        struct NvKmsHDRStaticMetadata val;
@@ -331,6 +319,7 @@ struct NvKmsKapiHeadModeSetConfig {

    struct {
        struct {
+            NvBool specified;
            NvU32 depth;
            NvU32 start;
            NvU32 end;
@@ -338,6 +327,7 @@ struct NvKmsKapiHeadModeSetConfig {
        } input;

        struct {
+            NvBool specified;
            NvBool enabled;
            struct NvKmsLutRamps *pRamps;
        } output;
@@ -352,8 +342,7 @@ struct NvKmsKapiHeadRequestedConfig {
        NvBool modeChanged         : 1;
        NvBool hdrInfoFrameChanged : 1;
        NvBool colorimetryChanged  : 1;
-        NvBool ilutChanged         : 1;
-        NvBool olutChanged         : 1;
+        NvBool lutChanged      : 1;
    } flags;

    struct NvKmsKapiCursorRequestedConfig cursorRequestedConfig;
@@ -379,8 +368,6 @@ struct NvKmsKapiHeadReplyConfig {

 struct NvKmsKapiModeSetReplyConfig {
    enum NvKmsFlipResult flipResult;
-    NvBool vrrFlip;
-    NvS32 vrrSemaphoreIndex;
    struct NvKmsKapiHeadReplyConfig
        headReplyConfig[NVKMS_KAPI_MAX_HEADS];
 };
@@ -1423,87 +1410,6 @@ struct NvKmsKapiFunctionsTable {
    (
        NvKmsKapiSuspendResumeCallbackFunc *function
    );
-
-    /*!
-     * Immediately reset the specified display semaphore to the pending state.
-     *
-     * Must be called prior to applying a mode set that utilizes the specified
-     * display semaphore for synchronization.
-     *
-     * \param [in] device         The device which will utilize the semaphore.
-     *
-     * \param [in] semaphoreIndex Index of the desired semaphore within the
-     *                            NVKMS semaphore pool. Must be less than
-     *                            NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
-     *                            for the specified device.
-     */
-    NvBool
-    (*resetDisplaySemaphore)
-    (
-        struct NvKmsKapiDevice *device,
-        NvU32 semaphoreIndex
-    );
-
-    /*!
-     * Immediately set the specified display semaphore to the displayable state.
-     *
-     * Must be called after \ref resetDisplaySemaphore to indicate a mode
-     * configuration change that utilizes the specified display semaphore for
-     * synchronization may proceed.
-     *
-     * \param [in] device         The device which will utilize the semaphore.
-     *
-     * \param [in] semaphoreIndex Index of the desired semaphore within the
-     *                            NVKMS semaphore pool. Must be less than
-     *                            NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
-     *                            for the specified device.
-     */
-    void
-    (*signalDisplaySemaphore)
-    (
-        struct NvKmsKapiDevice *device,
-        NvU32 semaphoreIndex
-    );
-
-    /*!
-     * Immediately cancel use of a display semaphore by resetting its value to
-     * its initial state.
-     *
-     * This can be used by clients to restore a semaphore to a consistent state
-     * when they have prepared it for use by previously calling
-     * \ref resetDisplaySemaphore() on it, but are then prevented from
-     * submitting the associated hardware operations to consume it due to the
-     * subsequent failure of some software or hardware operation.
-     *
-     * \param [in] device         The device which will utilize the semaphore.
-     *
-     * \param [in] semaphoreIndex Index of the desired semaphore within the
-     *                            NVKMS semaphore pool. Must be less than
-     *                            NvKmsKapiDeviceResourcesInfo::caps::numDisplaySemaphores
-     *                            for the specified device.
-     */
-    void
-    (*cancelDisplaySemaphore)
-    (
-        struct NvKmsKapiDevice *device,
-        NvU32 semaphoreIndex
-    );
-
-    /*!
-     * Signal the VRR semaphore at the specified index from the CPU.
-     * If device does not support VRR semaphores, this is a no-op.
-     * Returns true if signal is success or no-op, otherwise returns false.
-     *
-     * \param [in]  device  A device allocated using allocateDevice().
-     *
-     * \param [in]  index   The VRR semaphore index to be signalled.
-     */
-    NvBool
-    (*signalVrrSemaphore)
-    (
-        struct NvKmsKapiDevice *device,
-        NvS32 index
-    );
 };

 /** @} */
--- a/kernel-open/common/inc/nvmisc.h
+++ b/kernel-open/common/inc/nvmisc.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -67,9 +67,6 @@ extern "C" {
 #define NVBIT64(b)                NVBIT_TYPE(b, NvU64)
 #endif

-//Concatenate 2 32bit values to a 64bit value
-#define NV_CONCAT_32_TO_64(hi, lo) ((((NvU64)hi) << 32) | ((NvU64)lo))
-
 // Helper macro's for 32 bit bitmasks
 #define NV_BITMASK32_ELEMENT_SIZE            (sizeof(NvU32) << 3)
 #define NV_BITMASK32_IDX(chId)               (((chId) & ~(0x1F)) >> 5)  
@@ -497,23 +494,6 @@ do                                                      \
 //
 #define NV_TWO_N_MINUS_ONE(n) (((1ULL<<(n/2))<<((n+1)/2))-1)

-//
-// Create a 64b bitmask with n bits set
-// This is the same as ((1ULL<<n) - 1), but it doesn't overflow for n=64
-//
-// ...
-// n=-1, 0x0000000000000000
-// n=0,  0x0000000000000000
-// n=1,  0x0000000000000001
-// ...
-// n=63, 0x7FFFFFFFFFFFFFFF
-// n=64, 0xFFFFFFFFFFFFFFFF
-// n=65, 0xFFFFFFFFFFFFFFFF
-// n=66, 0xFFFFFFFFFFFFFFFF
-// ...
-//
-#define NV_BITMASK64(n) ((n<1) ? 0ULL : (NV_U64_MAX>>((n>64) ? 0 : (64-n))))
-
 #define DRF_READ_1WORD_BS(d,r,f,v) \
    ((DRF_EXTENT_MW(NV##d##r##f)<8)?DRF_READ_1BYTE_BS(NV##d##r##f,(v)): \
    ((DRF_EXTENT_MW(NV##d##r##f)<16)?DRF_READ_2BYTE_BS(NV##d##r##f,(v)): \
@@ -594,13 +574,6 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
    n32 = BIT_IDX_32(LOWESTBIT(n32));\
 }

-// Destructive operation on n64
-#define LOWESTBITIDX_64(n64)         \
-{                                    \
-    n64 = BIT_IDX_64(LOWESTBIT(n64));\
-}
-
-
 // Destructive operation on n32
 #define HIGHESTBITIDX_32(n32)   \
 {                               \
@@ -945,11 +918,6 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
 // Use (lo) if (b) is less than 64, and (hi) if >= 64.
 //
 #define NV_BIT_SET_128(b, lo, hi)              { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
-//
-// Clear the bit at pos (b) for U64 which is < 128.
-// Use (lo) if (b) is less than 64, and (hi) if >= 64.
-//
-#define NV_BIT_CLEAR_128(b, lo, hi)            { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) &= ~NVBIT64(b); else (hi) &= ~NVBIT64( b & 0x3F ); }

 // Get the number of elements the specified fixed-size array
 #define NV_ARRAY_ELEMENTS(x)                   ((sizeof(x)/sizeof((x)[0])))
--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -152,9 +152,8 @@ NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT,      0x0000007A, "Fabric Manag
 NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED,               0x0000007B, "Semaphore Surface value already >= requested wait value")
 NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE,   0x0000007C, "PMU RPC error due to no queue slot available for this event")
 NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS,        0x0000007D, "Operation not allowed as key rotation is in progress")
-NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED,      0x0000007E, "Test-only code path not enabled")
-NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED,              0x0000007F, "GFW secure boot failed")
-NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY,          0x00000080, "No more ZBC entry for the client")
+NV_STATUS_CODE(NV_ERR_NVSWITCH_FABRIC_NOT_READY,       0x00000081, "Nvswitch Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
+NV_STATUS_CODE(NV_ERR_NVSWITCH_FABRIC_FAILURE,         0x00000082, "Nvswitch Fabric Probe failed")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
--- a/kernel-open/common/inc/nvtypes.h
+++ b/kernel-open/common/inc/nvtypes.h
@@ -152,12 +152,6 @@ typedef   signed short     NvS16; /* -32768 to 32767                         */
     (((NvU32)(c) & 0xff) << 8)  | \
     (((NvU32)(d) & 0xff))))

-// Macro to build an NvU64 from two DWORDS, listed from msb to lsb
-#define NvU64_BUILD(a, b) \
-    ((NvU64)( \
-     (((NvU64)(a) & ~0U) << 32) | \
-     (((NvU64)(b) & ~0U))))
-
 #if NVTYPES_USE_STDINT
 typedef uint32_t           NvV32; /* "void": enumerated or multiple fields   */
 typedef uint32_t           NvU32; /* 0 to 4294967295                         */
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -151,7 +151,6 @@ void        NV_API_CALL  os_release_rwlock_read      (void *);
 void        NV_API_CALL  os_release_rwlock_write     (void *);
 NvBool      NV_API_CALL  os_semaphore_may_sleep      (void);
 NV_STATUS   NV_API_CALL  os_get_version_info         (os_version_info*);
-NV_STATUS   NV_API_CALL  os_get_is_openrm            (NvBool *);
 NvBool      NV_API_CALL  os_is_isr                   (void);
 NvBool      NV_API_CALL  os_pat_supported            (void);
 void        NV_API_CALL  os_dump_stack               (void);
@@ -219,6 +218,8 @@ extern NvU32 os_page_size;
 extern NvU64 os_page_mask;
 extern NvU8  os_page_shift;
 extern NvBool os_cc_enabled;
+extern NvBool os_cc_sev_snp_enabled;
+extern NvBool os_cc_snp_vtom_enabled;
 extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;
 extern NvBool os_imex_channel_is_supported;
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -71,7 +71,7 @@ test_header_presence() {
    TEST_CFLAGS="-E -M $CFLAGS"

    file="$1"
-    file_define=NV_`echo $file | tr '/.-' '___' | tr 'a-z' 'A-Z'`_PRESENT
+    file_define=NV_`echo $file | tr '/.\-a-z' '___A-Z'`_PRESENT

    CODE="#include <$file>"

@@ -5102,6 +5102,42 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
        ;;

+        cc_attr_guest_sev_snp)
+            #
+            # Determine if 'CC_ATTR_GUEST_SEV_SNP' is present.
+            #
+            # Added by commit aa5a461171f9 ("x86/mm: Extend cc_attr to
+            # include AMD SEV-SNP") in v5.19.
+            #
+            CODE="
+            #if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
+            #include <linux/cc_platform.h>
+            #endif
+
+            enum cc_attr cc_attributes = CC_ATTR_GUEST_SEV_SNP;
+            "
+
+            compile_check_conftest "$CODE" "NV_CC_ATTR_SEV_SNP" "" "types"
+        ;;
+
+        hv_get_isolation_type)
+            #
+            # Determine if 'hv_get_isolation_type()' is present.
+            # Added by commit faff44069ff5 ("x86/hyperv: Add Write/Read MSR
+            # registers via ghcb page") in v5.16.
+            #
+            CODE="
+            #if defined(NV_ASM_MSHYPERV_H_PRESENT)
+            #include <asm/mshyperv.h>
+            #endif
+            void conftest_hv_get_isolation_type(void) {
+                int i;
+                hv_get_isolation_type(i);
+            }"
+
+            compile_check_conftest "$CODE" "NV_HV_GET_ISOLATION_TYPE" "" "functions"
+        ;;
+
        drm_prime_pages_to_sg_has_drm_device_arg)
            #
            # Determine if drm_prime_pages_to_sg() has 'dev' argument.
@@ -5554,8 +5590,7 @@ compile_test() {

        of_dma_configure)
            #
-            # Determine if of_dma_configure() function is present, and how
-            # many arguments it takes.
+            # Determine if of_dma_configure() function is present
            #
            # Added by commit 591c1ee465ce ("of: configure the platform
            # device dma parameters") in v3.16.  However, it was a static,
@@ -5565,69 +5600,17 @@ compile_test() {
            # commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
            # to help re-use") in v4.1.
            #
-            # It subsequently began taking a third parameter with commit
-            # 3d6ce86ee794 ("drivers: remove force dma flag from buses")
-            # in v4.18.
-            #
-
-            echo "$CONFTEST_PREAMBLE
+            CODE="
            #if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
            #include <linux/of_device.h>
            #endif
-
            void conftest_of_dma_configure(void)
            {
                of_dma_configure();
            }
-            " > conftest$$.c
+            "

-            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-            rm -f conftest$$.c
-
-            if [ -f conftest$$.o ]; then
-                rm -f conftest$$.o
-
-                echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
-                echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
-            else
-                echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
-
-                echo "$CONFTEST_PREAMBLE
-                #if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
-                #include <linux/of_device.h>
-                #endif
-
-                void conftest_of_dma_configure(void) {
-                    of_dma_configure(NULL, NULL, false);
-                }" > conftest$$.c
-
-                $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-                rm -f conftest$$.c
-
-                if [ -f conftest$$.o ]; then
-                    rm -f conftest$$.o
-                    echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
-                    return
-                fi
-
-                echo "$CONFTEST_PREAMBLE
-                #if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
-                #include <linux/of_device.h>
-                #endif
-
-                void conftest_of_dma_configure(void) {
-                    of_dma_configure(NULL, NULL);
-                }" > conftest$$.c
-
-                $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-                rm -f conftest$$.c
-
-                if [ -f conftest$$.o ]; then
-                    rm -f conftest$$.o
-                    echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
-                    return
-                fi
-            fi
+            compile_check_conftest "$CODE" "NV_OF_DMA_CONFIGURE_PRESENT" "" "functions"
        ;;

        icc_get)
@@ -6596,7 +6579,9 @@ compile_test() {
            # Determine whether drm_fbdev_generic_setup is present.
            #
            # Added by commit 9060d7f49376 ("drm/fb-helper: Finish the
-            # generic fbdev emulation") in v4.19.
+            # generic fbdev emulation") in v4.19. Removed by commit
+            # aae4682e5d66 ("drm/fbdev-generic: Convert to fbdev-ttm")
+            # in v6.11.
            #
            CODE="
            #include <drm/drm_fb_helper.h>
@@ -6608,6 +6593,48 @@ compile_test() {
            }"

            compile_check_conftest "$CODE" "NV_DRM_FBDEV_GENERIC_SETUP_PRESENT" "" "functions"
+            ;;
+
+        drm_fbdev_ttm_setup)
+            #
+            # Determine whether drm_fbdev_ttm_setup is present.
+            #
+            # Added by commit aae4682e5d66 ("drm/fbdev-generic:
+            # Convert to fbdev-ttm") in v6.11.
+            #
+            CODE="
+            #include <drm/drm_fb_helper.h>
+            #if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
+            #include <drm/drm_fbdev_ttm.h>
+            #endif
+            void conftest_drm_fbdev_ttm_setup(void) {
+                drm_fbdev_ttm_setup();
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
+        ;;
+
+        drm_output_poll_changed)
+            #
+            # Determine whether drm_mode_config_funcs.output_poll_changed
+            # callback is present
+            #
+            # Removed by commit 446d0f4849b1 ("drm: Remove struct
+            # drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug
+            # event support is handled through the fbdev emulation interface
+            # going forward.
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT)
+            #include <drm/drm_mode_config.h>
+            #else
+            #include <drm/drm_crtc.h>
+            #endif
+            int conftest_drm_output_poll_changed_available(void) {
+                return offsetof(struct drm_mode_config_funcs, output_poll_changed);
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
        ;;

        drm_aperture_remove_conflicting_pci_framebuffers)
@@ -6848,45 +6875,12 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
        ;;

-        drm_syncobj_features_present)
-            # Determine if DRIVER_SYNCOBJ and DRIVER_SYNCOBJ_TIMELINE DRM
-            # driver features are present. Timeline DRM synchronization objects
-            # may only be used if both of these are supported by the driver.
-            #
-            # DRIVER_SYNCOBJ_TIMELINE Added by commit 060cebb20cdb ("drm:
-            # introduce a capability flag for syncobj timeline support") in
-            # v5.2
-            #
-            # DRIVER_SYNCOBJ Added by commit e9083420bbac ("drm: introduce
-            # sync objects (v4)") in v4.12
-            CODE="
-            #if defined(NV_DRM_DRM_DRV_H_PRESENT)
-            #include <drm/drm_drv.h>
-            #endif
-            int features = DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE;"
-
-            compile_check_conftest "$CODE" "NV_DRM_SYNCOBJ_FEATURES_PRESENT" "" "types"
-        ;;
-
-        stack_trace)
-            # Determine if functions stack_trace_{save,print} are present.
-            # Added by commit e9b98e162 ("stacktrace: Provide helpers for
-            # common stack trace operations") in v5.2.
-            CODE="
-            #include <linux/stacktrace.h>
-            void conftest_stack_trace(void) {
-                stack_trace_save();
-                stack_trace_print();
-            }"
-
-            compile_check_conftest "$CODE" "NV_STACK_TRACE_PRESENT" "" "functions"
-        ;;
-
        drm_unlocked_ioctl_flag_present)
            # Determine if DRM_UNLOCKED IOCTL flag is present.
            #
            # DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
-            # locking for legacy ioctls and DRM_UNLOCKED") in v6.8.
+            # locking for legacy ioctls and DRM_UNLOCKED") in Linux
+            # next-20231208.
            #
            # DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
            # commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
@@ -6902,94 +6896,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
        ;;

-        fault_flag_remote_present)
-            # Determine if FAULT_FLAG_REMOTE is present in the kernel, either
-            # as a define or an enum
-            #
-            # FAULT_FLAG_REMOTE define added by Kernel commit 1b2ee1266ea6
-            # ("mm/core: Do not enforce PKEY permissions on remote mm access")
-            # in v4.6
-            # FAULT_FLAG_REMOTE changed from define to enum by Kernel commit
-            # da2f5eb3d344 ("mm/doc: turn fault flags into an enum") in v5.13
-            # FAULT_FLAG_REMOTE moved from `mm.h` to `mm_types.h` by Kernel
-            # commit 36090def7bad ("mm: move tlb_flush_pending inline helpers
-            # to mm_inline.h") in v5.17
-            #
-            CODE="
-            #include <linux/mm.h>
-            int fault_flag_remote = FAULT_FLAG_REMOTE;
-            "
-
-            compile_check_conftest "$CODE" "NV_MM_HAS_FAULT_FLAG_REMOTE" "" "types"
-        ;;
-
-        drm_framebuffer_obj_present)
-            #
-            # Determine if the drm_framebuffer struct has an obj member.
-            #
-            # Added by commit 4c3dbb2c312c ("drm: Add GEM backed framebuffer
-            # library") in v4.14.
-            #
-            CODE="
-            #if defined(NV_DRM_DRMP_H_PRESENT)
-            #include <drm/drmP.h>
-            #endif
-
-            #if defined(NV_DRM_DRM_FRAMEBUFFER_H_PRESENT)
-            #include <drm/drm_framebuffer.h>
-            #endif
-
-            int conftest_drm_framebuffer_obj_present(void) {
-                return offsetof(struct drm_framebuffer, obj);
-            }"
-
-            compile_check_conftest "$CODE" "NV_DRM_FRAMEBUFFER_OBJ_PRESENT" "" "types"
-        ;;
-
-        drm_color_ctm_3x4_present)
-            # Determine if struct drm_color_ctm_3x4 is present.
-            #
-            # struct drm_color_ctm_3x4 was added by commit 6872a189be50
-            # ("drm/amd/display: Add 3x4 CTM support for plane CTM") in v6.8.
-            CODE="
-            #include <uapi/drm/drm_mode.h>
-            struct drm_color_ctm_3x4 ctm;"
-
-            compile_check_conftest "$CODE" "NV_DRM_COLOR_CTM_3X4_PRESENT" "" "types"
-        ;;
-
-        drm_color_lut)
-            # Determine if struct drm_color_lut is present.
-            #
-            # struct drm_color_lut was added by commit 5488dc16fde7
-            # ("drm: introduce pipe color correction properties") in v4.6.
-            CODE="
-            #include <uapi/drm/drm_mode.h>
-            struct drm_color_lut lut;"
-
-            compile_check_conftest "$CODE" "NV_DRM_COLOR_LUT_PRESENT" "" "types"
-        ;;
-
-        drm_property_blob_put)
-            #
-            # Determine if function drm_property_blob_put() is present.
-            #
-            # Added by commit 6472e5090be7 ("drm: Introduce
-            # drm_property_blob_{get,put}()") v4.12, when it replaced
-            # drm_property_unreference_blob().
-            #
-
-            CODE="
-            #if defined(NV_DRM_DRM_PROPERTY_H_PRESENT)
-            #include <drm/drm_property.h>
-            #endif
-            void conftest_drm_property_blob_put(void) {
-                drm_property_blob_put();
-            }"
-
-            compile_check_conftest "$CODE" "NV_DRM_PROPERTY_BLOB_PUT_PRESENT" "" "functions"
-        ;;
-
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.  Please
        # avoid specifying -rc kernels, and only use SHAs that actually exist
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -15,6 +15,7 @@ NV_HEADER_PRESENCE_TESTS = \
  drm/drm_atomic_uapi.h \
  drm/drm_drv.h \
  drm/drm_fbdev_generic.h \
+  drm/drm_fbdev_ttm.h \
  drm/drm_framebuffer.h \
  drm/drm_connector.h \
  drm/drm_probe_helper.h \
@@ -28,7 +29,6 @@ NV_HEADER_PRESENCE_TESTS = \
  drm/drm_device.h \
  drm/drm_mode_config.h \
  drm/drm_modeset_lock.h \
-  drm/drm_property.h \
  dt-bindings/interconnect/tegra_icc_id.h \
  generated/autoconf.h \
  generated/compile.h \
@@ -53,7 +53,6 @@ NV_HEADER_PRESENCE_TESTS = \
  linux/dma-resv.h \
  soc/tegra/chip-id.h \
  soc/tegra/fuse.h \
-  soc/tegra/fuse-helper.h \
  soc/tegra/tegra_bpmp.h \
  video/nv_internal.h \
  linux/platform/tegra/dce/dce-client-ipc.h \
@@ -99,5 +98,6 @@ NV_HEADER_PRESENCE_TESTS = \
  linux/sync_file.h \
  linux/cc_platform.h \
  asm/cpufeature.h \
-  linux/mpi.h
+  linux/mpi.h \
+  asm/mshyperv.h

--- a/kernel-open/nvidia-drm/nv-kthread-q.c
+++ b/kernel-open/nvidia-drm/nv-kthread-q.c
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),

        // Ran out of attempts - return thread even if its stack may not be
        // allocated on the preferred node
-        if ((i == (attempts - 1)))
+        if (i == (attempts - 1))
            break;

        // Get the NUMA node where the first page of the stack is resident. If
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c
@@ -42,6 +42,12 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>

+#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
+#include <linux/nvhost.h>
+#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)            
+#include <linux/host1x-next.h>
+#endif
+
 #if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
 #include <drm/drm_color_mgmt.h>
 #endif
@@ -170,10 +176,12 @@ cursor_plane_req_config_update(struct drm_plane *plane,
        return;
    }

-    memset(req_config, 0, sizeof(*req_config));
-    req_config->surface = to_nv_framebuffer(plane_state->fb)->pSurface;
-    req_config->dstX = plane_state->crtc_x;
-    req_config->dstY = plane_state->crtc_y;
+    *req_config = (struct NvKmsKapiCursorRequestedConfig) {
+        .surface = to_nv_framebuffer(plane_state->fb)->pSurface,
+
+        .dstX = plane_state->crtc_x,
+        .dstY = plane_state->crtc_y,
+    };

 #if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
    if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
@@ -258,6 +266,7 @@ plane_req_config_update(struct drm_plane *plane,
 {
    struct nv_drm_plane *nv_plane = to_nv_plane(plane);
    struct NvKmsKapiLayerConfig old_config = req_config->config;
+    struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
    struct nv_drm_plane_state *nv_drm_plane_state =
        to_nv_drm_plane_state(plane_state);

@@ -266,22 +275,24 @@ plane_req_config_update(struct drm_plane *plane,
        return 0;
    }

-    memset(req_config, 0, sizeof(*req_config));
+    *req_config = (struct NvKmsKapiLayerRequestedConfig) {
+        .config = {
+            .surface = to_nv_framebuffer(plane_state->fb)->pSurface,

-    req_config->config.surface = to_nv_framebuffer(plane_state->fb)->pSurface;
+            /* Source values are 16.16 fixed point */
+            .srcX = plane_state->src_x >> 16,
+            .srcY = plane_state->src_y >> 16,
+            .srcWidth  = plane_state->src_w >> 16,
+            .srcHeight = plane_state->src_h >> 16,

-    /* Source values are 16.16 fixed point */
-    req_config->config.srcX = plane_state->src_x >> 16;
-    req_config->config.srcY = plane_state->src_y >> 16;
-    req_config->config.srcWidth  = plane_state->src_w >> 16;
-    req_config->config.srcHeight = plane_state->src_h >> 16;
+            .dstX = plane_state->crtc_x,
+            .dstY = plane_state->crtc_y,
+            .dstWidth  = plane_state->crtc_w,
+            .dstHeight = plane_state->crtc_h,

-    req_config->config.dstX = plane_state->crtc_x;
-    req_config->config.dstY = plane_state->crtc_y;
-    req_config->config.dstWidth  = plane_state->crtc_w;
-    req_config->config.dstHeight = plane_state->crtc_h;
-
-    req_config->config.csc = old_config.csc;
+            .csc = old_config.csc
+        },
+    };

 #if defined(NV_DRM_ROTATION_AVAILABLE)
    /*
@@ -385,16 +396,49 @@ plane_req_config_update(struct drm_plane *plane,
    req_config->config.inputColorSpace =
        nv_drm_plane_state->input_colorspace;

-    req_config->config.syncParams.preSyncptSpecified = false;
-    req_config->config.syncParams.postSyncptRequested = false;
-    req_config->config.syncParams.semaphoreSpecified = false;
+    req_config->config.syncptParams.preSyncptSpecified = false;
+    req_config->config.syncptParams.postSyncptRequested = false;

-    if (nv_drm_plane_state->fd_user_ptr) {
-        if (to_nv_device(plane->dev)->supportsSyncpts) {
-            req_config->config.syncParams.postSyncptRequested = true;
-        } else {
+    if (plane_state->fence != NULL || nv_drm_plane_state->fd_user_ptr) {
+        if (!nv_dev->supportsSyncpts) {
            return -1;
        }
+
+#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
+#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
+        if (plane_state->fence != NULL) {
+            int ret = nvhost_dma_fence_unpack(
+                          plane_state->fence,
+                          &req_config->config.syncptParams.preSyncptId,
+                          &req_config->config.syncptParams.preSyncptValue);
+            if (ret != 0) {
+                return ret;
+            }
+            req_config->config.syncptParams.preSyncptSpecified = true;
+        }
+#endif
+
+        if (nv_drm_plane_state->fd_user_ptr) {
+            req_config->config.syncptParams.postSyncptRequested = true;
+        }           
+#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)            
+        if (plane_state->fence != NULL) {            
+            int ret = host1x_fence_extract(            
+                      plane_state->fence,            
+                      &req_config->config.syncptParams.preSyncptId,            
+                      &req_config->config.syncptParams.preSyncptValue);            
+            if (ret != 0) {            
+                return ret;            
+            }            
+            req_config->config.syncptParams.preSyncptSpecified = true;            
+        }            
+
+        if (nv_drm_plane_state->fd_user_ptr) {            
+            req_config->config.syncptParams.postSyncptRequested = true;            
+        }
+#else
+        return -1;
+#endif
    }

 #if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
@@ -644,7 +688,9 @@ static int nv_drm_plane_atomic_set_property(
        to_nv_drm_plane_state(state);

    if (property == nv_dev->nv_out_fence_property) {
-        nv_drm_plane_state->fd_user_ptr = (void __user *)(uintptr_t)(val);
+#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
+        nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
+#endif
        return 0;
    } else if (property == nv_dev->nv_input_colorspace_property) {
        nv_drm_plane_state->input_colorspace = val;
@@ -817,7 +863,7 @@ __nv_drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
 #endif
 }

-static inline bool nv_drm_crtc_duplicate_req_head_modeset_config(
+static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
    const struct NvKmsKapiHeadRequestedConfig *old,
    struct NvKmsKapiHeadRequestedConfig *new)
 {
@@ -829,41 +875,15 @@ static inline bool nv_drm_crtc_duplicate_req_head_modeset_config(
     * there is no change in new configuration yet with respect
     * to older one!
     */
-    memset(new, 0, sizeof(*new));
-    new->modeSetConfig = old->modeSetConfig;
+    *new = (struct NvKmsKapiHeadRequestedConfig) {
+        .modeSetConfig = old->modeSetConfig,
+    };

    for (i = 0; i < ARRAY_SIZE(old->layerRequestedConfig); i++) {
-        new->layerRequestedConfig[i].config =
-            old->layerRequestedConfig[i].config;
+        new->layerRequestedConfig[i] = (struct NvKmsKapiLayerRequestedConfig) {
+            .config = old->layerRequestedConfig[i].config,
+        };
    }
-
-    if (old->modeSetConfig.lut.input.pRamps) {
-        new->modeSetConfig.lut.input.pRamps =
-            nv_drm_calloc(1, sizeof(*new->modeSetConfig.lut.input.pRamps));
-
-        if (!new->modeSetConfig.lut.input.pRamps) {
-            return false;
-        }
-        *new->modeSetConfig.lut.input.pRamps =
-            *old->modeSetConfig.lut.input.pRamps;
-    }
-    if (old->modeSetConfig.lut.output.pRamps) {
-        new->modeSetConfig.lut.output.pRamps =
-            nv_drm_calloc(1, sizeof(*new->modeSetConfig.lut.output.pRamps));
-
-        if (!new->modeSetConfig.lut.output.pRamps) {
-            /*
-             * new->modeSetConfig.lut.input.pRamps is either NULL or it was
-             * just allocated
-             */
-            nv_drm_free(new->modeSetConfig.lut.input.pRamps);
-            new->modeSetConfig.lut.input.pRamps = NULL;
-            return false;
-        }
-        *new->modeSetConfig.lut.output.pRamps =
-            *old->modeSetConfig.lut.output.pRamps;
-    }
-    return true;
 }

 static inline struct nv_drm_crtc_state *nv_drm_crtc_state_alloc(void)
@@ -935,24 +955,17 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
        return NULL;
    }

+    __drm_atomic_helper_crtc_duplicate_state(crtc, &nv_state->base);
+
    INIT_LIST_HEAD(&nv_state->nv_flip->list_entry);
    INIT_LIST_HEAD(&nv_state->nv_flip->deferred_flip_list);

-    /*
-     * nv_drm_crtc_duplicate_req_head_modeset_config potentially allocates
-     * nv_state->req_config.modeSetConfig.lut.{in,out}put.pRamps, so they should
-     * be freed in any following failure paths.
-     */
-    if (!nv_drm_crtc_duplicate_req_head_modeset_config(
-             &(to_nv_crtc_state(crtc->state)->req_config),
-             &nv_state->req_config)) {
+    nv_drm_crtc_duplicate_req_head_modeset_config(
+        &(to_nv_crtc_state(crtc->state)->req_config),
+        &nv_state->req_config);

-        nv_drm_free(nv_state->nv_flip);
-        nv_drm_free(nv_state);
-        return NULL;
-    }
-
-    __drm_atomic_helper_crtc_duplicate_state(crtc, &nv_state->base);
+    nv_state->ilut_ramps = NULL;
+    nv_state->olut_ramps = NULL;

    return &nv_state->base;
 }
@@ -977,8 +990,8 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,

    __nv_drm_atomic_helper_crtc_destroy_state(crtc, &nv_state->base);

-    nv_drm_free(nv_state->req_config.modeSetConfig.lut.input.pRamps);
-    nv_drm_free(nv_state->req_config.modeSetConfig.lut.output.pRamps);
+    nv_drm_free(nv_state->ilut_ramps);
+    nv_drm_free(nv_state->olut_ramps);

    nv_drm_free(nv_state);
 }
@@ -1061,82 +1074,94 @@ static int color_mgmt_config_set_luts(struct nv_drm_crtc_state *nv_crtc_state,
     * According to the comment in the Linux kernel's
     * drivers/gpu/drm/drm_color_mgmt.c, if either property is NULL, that LUT
     * needs to be changed to a linear LUT
-     *
-     * On failure, any LUT ramps allocated in this function are freed when the
-     * subsequent atomic state cleanup calls nv_drm_atomic_crtc_destroy_state.
     */

+    req_config->flags.lutChanged = NV_TRUE;
    if (crtc_state->degamma_lut) {
        struct drm_color_lut *degamma_lut = NULL;
        uint64_t degamma_len = 0;

-        if (!modeset_config->lut.input.pRamps) {
-            modeset_config->lut.input.pRamps =
-                nv_drm_calloc(1, sizeof(*modeset_config->lut.input.pRamps));
-            if (!modeset_config->lut.input.pRamps) {
-                return -ENOMEM;
-            }
+        nv_crtc_state->ilut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->ilut_ramps));
+        if (!nv_crtc_state->ilut_ramps) {
+            ret = -ENOMEM;
+            goto fail;
        }

        degamma_lut = (struct drm_color_lut *)crtc_state->degamma_lut->data;
        degamma_len = crtc_state->degamma_lut->length /
                      sizeof(struct drm_color_lut);

-        if ((ret = color_mgmt_config_copy_lut(modeset_config->lut.input.pRamps,
+        if ((ret = color_mgmt_config_copy_lut(nv_crtc_state->ilut_ramps,
                                              degamma_lut,
                                              degamma_len)) != 0) {
-            return ret;
+            goto fail;
        }

+        modeset_config->lut.input.specified = NV_TRUE;
        modeset_config->lut.input.depth     = 30; /* specify the full LUT */
        modeset_config->lut.input.start     = 0;
        modeset_config->lut.input.end       = degamma_len - 1;
+        modeset_config->lut.input.pRamps    = nv_crtc_state->ilut_ramps;
    } else {
        /* setting input.end to 0 is equivalent to disabling the LUT, which
         * should be equivalent to a linear LUT */
+        modeset_config->lut.input.specified = NV_TRUE;
        modeset_config->lut.input.depth     = 30; /* specify the full LUT */
        modeset_config->lut.input.start     = 0;
        modeset_config->lut.input.end       = 0;
-
-        nv_drm_free(modeset_config->lut.input.pRamps);
        modeset_config->lut.input.pRamps    = NULL;
+
    }
-    req_config->flags.ilutChanged = NV_TRUE;

    if (crtc_state->gamma_lut) {
        struct drm_color_lut *gamma_lut = NULL;
        uint64_t gamma_len = 0;

-        if (!modeset_config->lut.output.pRamps) {
-            modeset_config->lut.output.pRamps =
-                nv_drm_calloc(1, sizeof(*modeset_config->lut.output.pRamps));
-            if (!modeset_config->lut.output.pRamps) {
-                return -ENOMEM;
-            }
+        nv_crtc_state->olut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->olut_ramps));
+        if (!nv_crtc_state->olut_ramps) {
+            ret = -ENOMEM;
+            goto fail;
        }

        gamma_lut = (struct drm_color_lut *)crtc_state->gamma_lut->data;
        gamma_len = crtc_state->gamma_lut->length /
                    sizeof(struct drm_color_lut);

-        if ((ret = color_mgmt_config_copy_lut(modeset_config->lut.output.pRamps,
+        if ((ret = color_mgmt_config_copy_lut(nv_crtc_state->olut_ramps,
                                              gamma_lut,
                                              gamma_len)) != 0) {
-            return ret;
+            goto fail;
        }

+        modeset_config->lut.output.specified = NV_TRUE;
        modeset_config->lut.output.enabled   = NV_TRUE;
+        modeset_config->lut.output.pRamps    = nv_crtc_state->olut_ramps;
    } else {
        /* disabling the output LUT should be equivalent to setting a linear
         * LUT */
+        modeset_config->lut.output.specified = NV_TRUE;
        modeset_config->lut.output.enabled   = NV_FALSE;
-
-        nv_drm_free(modeset_config->lut.output.pRamps);
        modeset_config->lut.output.pRamps    = NULL;
    }
-    req_config->flags.olutChanged = NV_TRUE;

    return 0;
+
+fail:
+    /* free allocated state */
+    nv_drm_free(nv_crtc_state->ilut_ramps);
+    nv_drm_free(nv_crtc_state->olut_ramps);
+
+    /* remove dangling pointers */
+    nv_crtc_state->ilut_ramps = NULL;
+    nv_crtc_state->olut_ramps = NULL;
+    modeset_config->lut.input.pRamps = NULL;
+    modeset_config->lut.output.pRamps = NULL;
+
+    /* prevent attempts at reading NULLs */
+    modeset_config->lut.input.specified = NV_FALSE;
+    modeset_config->lut.output.specified = NV_FALSE;
+
+    return ret;
 }
 #endif /* NV_DRM_COLOR_MGMT_AVAILABLE */

@@ -1161,6 +1186,9 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
    struct NvKmsKapiHeadRequestedConfig *req_config =
        &nv_crtc_state->req_config;
    int ret = 0;
+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+    struct nv_drm_device *nv_dev = to_nv_device(crtc_state->crtc->dev);
+#endif

    if (crtc_state->mode_changed) {
        drm_mode_to_nvkms_display_mode(&crtc_state->mode,
@@ -1204,6 +1232,13 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
 #endif

 #if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+    if (nv_dev->drmMasterChangedSinceLastAtomicCommit &&
+        (crtc_state->degamma_lut ||
+         crtc_state->ctm ||
+         crtc_state->gamma_lut)) {
+
+        crtc_state->color_mgmt_changed = NV_TRUE;
+    }
    if (crtc_state->color_mgmt_changed) {
        if ((ret = color_mgmt_config_set_luts(nv_crtc_state, req_config)) != 0) {
            return ret;
@@ -1229,7 +1264,7 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {

 static void nv_drm_plane_install_properties(
    struct drm_plane *plane,
-    NvBool supportsICtCp)
+    NvBool supportsHDR)
 {
    struct nv_drm_device *nv_dev = to_nv_device(plane->dev);

@@ -1245,7 +1280,7 @@ static void nv_drm_plane_install_properties(
    }

 #if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
-    if (supportsICtCp && nv_dev->nv_hdr_output_metadata_property) {
+    if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
        drm_object_attach_property(
            &plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
    }
@@ -1431,7 +1466,7 @@ nv_drm_plane_create(struct drm_device *dev,
    if (plane_type != DRM_PLANE_TYPE_CURSOR) {
        nv_drm_plane_install_properties(
                plane,
-                pResInfo->supportsICtCp[layer_idx]);
+                pResInfo->supportsHDR[layer_idx]);
    }

    __nv_drm_plane_create_alpha_blending_properties(
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.h
@@ -129,6 +129,9 @@ struct nv_drm_crtc_state {
     */
    struct NvKmsKapiHeadRequestedConfig req_config;

+    struct NvKmsLutRamps *ilut_ramps;
+    struct NvKmsLutRamps *olut_ramps;
+
    /**
     * @nv_flip:
     *
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -64,12 +64,14 @@
 #include <drm/drm_ioctl.h>
 #endif

-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
 #include <drm/drm_aperture.h>
 #include <drm/drm_fb_helper.h>
 #endif

-#if defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
+#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
+#include <drm/drm_fbdev_ttm.h>
+#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
 #include <drm/drm_fbdev_generic.h>
 #endif

@@ -124,6 +126,7 @@ static const char* nv_get_input_colorspace_name(

 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)

+#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
 static void nv_drm_output_poll_changed(struct drm_device *dev)
 {
    struct drm_connector *connector = NULL;
@@ -167,6 +170,7 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
    nv_drm_connector_list_iter_end(&conn_iter);
 #endif
 }
+#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */

 static struct drm_framebuffer *nv_drm_framebuffer_create(
    struct drm_device *dev,
@@ -204,7 +208,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
    .atomic_check  = nv_drm_atomic_check,
    .atomic_commit = nv_drm_atomic_commit,

+    #if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
    .output_poll_changed = nv_drm_output_poll_changed,
+    #endif
 };

 static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
@@ -373,15 +379,19 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
        len++;
    }

-    if (nv_dev->supportsSyncpts) {
-        nv_dev->nv_out_fence_property =
-            drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
-                    "NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
-        if (nv_dev->nv_out_fence_property == NULL) {
-            return -ENOMEM;
-        }
+#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
+    if (!nv_dev->supportsSyncpts) {
+        return 0;
    }

+    nv_dev->nv_out_fence_property =
+        drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
+            "NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
+    if (nv_dev->nv_out_fence_property == NULL) {
+        return -ENOMEM;
+    }
+#endif
+
    nv_dev->nv_input_colorspace_property =
        drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
                                 enum_list, len);
@@ -430,7 +440,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)

    struct NvKmsKapiAllocateDeviceParams allocateDeviceParams;
    struct NvKmsKapiDeviceResourcesInfo resInfo;
-#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
+#endif
 #if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
    NvU64 kind;
    NvU64 gen;
@@ -476,7 +486,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
        return -ENODEV;
    }

-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
    /*
     * If fbdev is enabled, take modeset ownership now before other DRM clients
     * can take master (and thus NVKMS ownership).
@@ -517,12 +527,6 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
    nv_dev->semsurf_max_submitted_offset =
        resInfo.caps.semsurf.maxSubmittedOffset;

-    nv_dev->display_semaphores.count =
-        resInfo.caps.numDisplaySemaphores;
-    nv_dev->display_semaphores.next_index = 0;
-
-    nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
-
 #if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
    gen = nv_dev->pageKindGeneration;
    kind = nv_dev->genericPageKind;
@@ -610,7 +614,7 @@ static void __nv_drm_unload(struct drm_device *dev)

    /* Release modeset ownership if fbdev is enabled */

-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
    if (nv_dev->hasFramebufferConsole) {
        drm_atomic_helper_shutdown(dev);
        nvKms->releaseOwnership(nv_dev->pDevice);
@@ -679,6 +683,7 @@ static int __nv_drm_master_set(struct drm_device *dev,
        !nvKms->grabOwnership(nv_dev->pDevice)) {
        return -EINVAL;
    }
+    nv_dev->drmMasterChangedSinceLastAtomicCommit = NV_TRUE;

    return 0;
 }
@@ -807,14 +812,6 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
    return 0;
 }

-static int nv_drm_get_drm_file_unique_id_ioctl(struct drm_device *dev,
-                                               void *data, struct drm_file *filep)
-{
-    struct drm_nvidia_get_drm_file_unique_id_params *params = data;
-    params->id = (u64)(filep->driver_priv);
-    return 0;
-}
-
 static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
                                         void *data, struct drm_file *filep)
 {
@@ -1331,17 +1328,6 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
 }
 #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */

-static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
-{
-    _Static_assert(sizeof(filep->driver_priv) >= sizeof(u64),
-                   "filep->driver_priv can not hold an u64");
-    static atomic64_t id = ATOMIC_INIT(0);
-
-    filep->driver_priv = (void *)atomic64_inc_return(&id);
-
-    return 0;
-}
-
 #if defined(NV_DRM_MASTER_HAS_LEASES)
 static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
                                             int lessee_id)
@@ -1585,9 +1571,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
    DRM_IOCTL_DEF_DRV(NVIDIA_GET_DEV_INFO,
                      nv_drm_get_dev_info_ioctl,
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
-    DRM_IOCTL_DEF_DRV(NVIDIA_GET_DRM_FILE_UNIQUE_ID,
-                      nv_drm_get_drm_file_unique_id_ioctl,
-                      DRM_RENDER_ALLOW|DRM_UNLOCKED),

 #if defined(NV_DRM_FENCE_AVAILABLE)
    DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
@@ -1670,9 +1653,6 @@ static struct drm_driver nv_drm_driver = {
    .driver_features        =
 #if defined(NV_DRM_DRIVER_PRIME_FLAG_PRESENT)
                               DRIVER_PRIME |
-#endif
-#if defined(NV_DRM_SYNCOBJ_FEATURES_PRESENT)
-                               DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
 #endif
                               DRIVER_GEM  | DRIVER_RENDER,

@@ -1684,14 +1664,14 @@ static struct drm_driver nv_drm_driver = {
    .num_ioctls             = ARRAY_SIZE(nv_drm_ioctls),

 /*
- * Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
- * for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
+ * linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
+ * conversion") unexports drm_gem_prime_handle_to_fd() and
 * drm_gem_prime_fd_to_handle().
 *
- * Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
- * import/export for all drivers") made these helpers the default when
- * .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
- * to just skip specifying them if the helpers aren't present.
+ * Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
+ * all drivers") made these helpers the default when .prime_handle_to_fd /
+ * .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
+ * them if the helpers aren't present.
 */
 #if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
    .prime_handle_to_fd     = drm_gem_prime_handle_to_fd,
@@ -1725,7 +1705,6 @@ static struct drm_driver nv_drm_driver = {
 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
    .postclose              = nv_drm_postclose,
 #endif
-    .open                   = nv_drm_open,

    .fops                   = &nv_drm_fops,

@@ -1784,7 +1763,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
    struct nv_drm_device *nv_dev = NULL;
    struct drm_device *dev = NULL;
    struct device *device = gpu_info->os_device_ptr;
-    bool bus_is_pci;

    DRM_DEBUG(
        "Registering device for NVIDIA GPU ID 0x08%x",
@@ -1818,7 +1796,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
    dev->dev_private = nv_dev;
    nv_dev->dev = dev;

-    bus_is_pci =
+    bool bus_is_pci =
 #if defined(NV_LINUX)
        device->bus == &pci_bus_type;
 #elif defined(NV_BSD)
@@ -1838,7 +1816,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
        goto failed_drm_register;
    }

-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
    if (nv_drm_fbdev_module_param &&
        drm_core_check_feature(dev, DRIVER_MODESET)) {

@@ -1851,9 +1829,13 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
            drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
 #endif
        }
+        #if defined(NV_DRM_FBDEV_TTM_AVAILABLE)
+        drm_fbdev_ttm_setup(dev, 32);
+        #elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
        drm_fbdev_generic_setup(dev, 32);
+        #endif
    }
-#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
+#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */

    /* Add NVIDIA-DRM device into list */

@@ -1995,12 +1977,12 @@ void nv_drm_suspend_resume(NvBool suspend)

        if (suspend) {
            drm_kms_helper_poll_disable(dev);
-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
            drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
 #endif
            drm_mode_config_reset(dev);
        } else {
-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
            drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
 #endif
            drm_kms_helper_poll_enable(dev);
--- a/kernel-open/nvidia-drm/nvidia-drm-fence.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fence.c
@@ -293,12 +293,14 @@ __nv_drm_prime_fence_context_new(
     * to check a return value.
     */

-    nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
-    nv_prime_fence_context->base.nv_dev = nv_dev;
-    nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
-    nv_prime_fence_context->base.fenceSemIndex = p->index;
-    nv_prime_fence_context->pSemSurface = pSemSurface;
-    nv_prime_fence_context->pLinearAddress = pLinearAddress;
+    *nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
+        .base.ops = &nv_drm_prime_fence_context_ops,
+        .base.nv_dev = nv_dev,
+        .base.context = nv_dma_fence_context_alloc(1),
+        .base.fenceSemIndex = p->index,
+        .pSemSurface = pSemSurface,
+        .pLinearAddress = pLinearAddress,
+    };

    INIT_LIST_HEAD(&nv_prime_fence_context->pending);

@@ -1269,16 +1271,18 @@ __nv_drm_semsurf_fence_ctx_new(
     * to check a return value.
     */

-    ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
-    ctx->base.nv_dev = nv_dev;
-    ctx->base.context = nv_dma_fence_context_alloc(1);
-    ctx->base.fenceSemIndex = p->index;
-    ctx->pSemSurface = pSemSurface;
-    ctx->pSemMapping.pVoid = semMapping;
-    ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
-    ctx->callback.local = NULL;
-    ctx->callback.nvKms = NULL;
-    ctx->current_wait_value = 0;
+    *ctx = (struct nv_drm_semsurf_fence_ctx) {
+        .base.ops = &nv_drm_semsurf_fence_ctx_ops,
+        .base.nv_dev = nv_dev,
+        .base.context = nv_dma_fence_context_alloc(1),
+        .base.fenceSemIndex = p->index,
+        .pSemSurface = pSemSurface,
+        .pSemMapping.pVoid = semMapping,
+        .pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
+        .callback.local = NULL,
+        .callback.nvKms = NULL,
+        .current_wait_value = 0,
+    };

    spin_lock_init(&ctx->lock);
    INIT_LIST_HEAD(&ctx->pending_fences);
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -551,12 +551,14 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
 {
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    const struct nv_drm_device *nv_dev_src;
+    const struct nv_drm_gem_nvkms_memory *nv_nvkms_memory_src;
    struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
    struct NvKmsKapiMemory *pMemory;

    BUG_ON(nv_gem_src == NULL || nv_gem_src->ops != &nv_gem_nvkms_memory_ops);

    nv_dev_src = to_nv_device(nv_gem_src->base.dev);
+    nv_nvkms_memory_src = to_nv_nvkms_memory_const(nv_gem_src);

    if ((nv_nvkms_memory =
            nv_drm_calloc(1, sizeof(*nv_nvkms_memory))) == NULL) {
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c
@@ -45,7 +45,8 @@

 /*
 * The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
- * 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
+ * 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
+ * drm_crtc.h") in linux-next, expected in v5.19-rc7.
 *
 * We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
 * present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h
@@ -613,8 +613,8 @@ static inline int nv_drm_format_num_planes(uint32_t format)
 #endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */

 /*
- * DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
- * legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
+ * DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
+ * locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
 * implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
 * fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
 * commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
--- a/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
@@ -52,7 +52,6 @@
 #define DRM_NVIDIA_SEMSURF_FENCE_CREATE             0x15
 #define DRM_NVIDIA_SEMSURF_FENCE_WAIT               0x16
 #define DRM_NVIDIA_SEMSURF_FENCE_ATTACH             0x17
-#define DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID           0x18

 #define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY                           \
    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY),      \
@@ -158,11 +157,6 @@
              DRM_NVIDIA_SEMSURF_FENCE_ATTACH),                         \
              struct drm_nvidia_semsurf_fence_attach_params)

-#define DRM_IOCTL_NVIDIA_GET_DRM_FILE_UNIQUE_ID                         \
-    DRM_IOWR((DRM_COMMAND_BASE +                                        \
-              DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID),                       \
-              struct drm_nvidia_get_drm_file_unique_id_params)
-
 struct drm_nvidia_gem_import_nvkms_memory_params {
    uint64_t mem_size;           /* IN */

@@ -391,8 +385,4 @@ struct drm_nvidia_semsurf_fence_attach_params {
    uint64_t wait_value;            /* IN Semaphore value to reach before signal */
 };

-struct drm_nvidia_get_drm_file_unique_id_params {
-    uint64_t id;                    /* OUT Unique ID of the DRM file */
-};
-
 #endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
--- a/kernel-open/nvidia-drm/nvidia-drm-linux.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c
@@ -34,7 +34,7 @@ MODULE_PARM_DESC(
    "Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
 module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);

-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
 MODULE_PARM_DESC(
    fbdev,
    "Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
--- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c
@@ -42,16 +42,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>

-#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
-#include <linux/nvhost.h>
-#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)            
-#include <linux/host1x-next.h>
-#endif
-
-#if defined(NV_DRM_FENCE_AVAILABLE)
-#include "nvidia-dma-fence-helper.h"
-#endif
-
 struct nv_drm_atomic_state {
    struct NvKmsKapiRequestedModeSetConfig config;
    struct drm_atomic_state base;
@@ -156,159 +146,6 @@ static int __nv_drm_put_back_post_fence_fd(
    return ret;
 }

-#if defined(NV_DRM_FENCE_AVAILABLE)
-struct nv_drm_plane_fence_cb_data {
-    nv_dma_fence_cb_t dma_fence_cb;
-    struct nv_drm_device *nv_dev;
-    NvU32 semaphore_index;
-};
-
-static void
-__nv_drm_plane_fence_cb(
-    nv_dma_fence_t *fence,
-    nv_dma_fence_cb_t *cb_data
-)
-{
-    struct nv_drm_plane_fence_cb_data *fence_data =
-        container_of(cb_data, typeof(*fence_data), dma_fence_cb);
-    struct nv_drm_device *nv_dev = fence_data->nv_dev;
-
-    nv_dma_fence_put(fence);
-    nvKms->signalDisplaySemaphore(nv_dev->pDevice, fence_data->semaphore_index);
-    nv_drm_free(fence_data);
-}
-
-static int __nv_drm_convert_in_fences(
-    struct nv_drm_device *nv_dev,
-    struct drm_atomic_state *state,
-    struct drm_crtc *crtc,
-    struct drm_crtc_state *crtc_state)
-{
-    struct drm_plane *plane = NULL;
-    struct drm_plane_state *plane_state = NULL;
-    struct nv_drm_plane *nv_plane = NULL;
-    struct NvKmsKapiLayerRequestedConfig *plane_req_config = NULL;
-    struct NvKmsKapiHeadRequestedConfig *head_req_config =
-        &to_nv_crtc_state(crtc_state)->req_config;
-    struct nv_drm_plane_fence_cb_data *fence_data;
-    uint32_t semaphore_index;
-    int ret, i;
-
-    if (!crtc_state->active) {
-        return 0;
-    }
-
-    nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) {
-        if ((plane->type == DRM_PLANE_TYPE_CURSOR) ||
-            (plane_state->crtc != crtc) ||
-            (plane_state->fence == NULL)) {
-            continue;
-        }
-
-        nv_plane = to_nv_plane(plane);
-        plane_req_config =
-            &head_req_config->layerRequestedConfig[nv_plane->layer_idx];
-
-        if (nv_dev->supportsSyncpts) {
-#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
-#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
-            int ret =
-                nvhost_dma_fence_unpack(
-                    plane_state->fence,
-                    &plane_req_config->config.syncParams.u.syncpt.preSyncptId,
-                    &plane_req_config->config.syncParams.u.syncpt.preSyncptValue);
-            if (ret == 0) {
-                plane_req_config->config.syncParams.preSyncptSpecified = true;
-                continue;
-            }
-#endif
-#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
-            int ret =
-                host1x_fence_extract(
-                    plane_state->fence,
-                    &plane_req_config->config.syncParams.u.syncpt.preSyncptId,
-                    &plane_req_config->config.syncParams.u.syncpt.preSyncptValue);
-            if (ret == 0) {
-                plane_req_config->config.syncParams.preSyncptSpecified = true;
-                continue;
-            }
-#endif
-        }
-
-        /*
-         * Syncpt extraction failed, or syncpts are not supported.
-         * Use general DRM fence support with semaphores instead.
-         */
-        if (plane_req_config->config.syncParams.postSyncptRequested) {
-            // Can't mix Syncpts and semaphores in a given request.
-            return -EINVAL;
-        }
-
-        semaphore_index = nv_drm_next_display_semaphore(nv_dev);
-
-        if (!nvKms->resetDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
-            NV_DRM_DEV_LOG_ERR(
-                nv_dev,
-                "Failed to initialize semaphore for plane fence");
-            /*
-             * This should only happen if the semaphore pool was somehow
-             * exhausted. Waiting a bit and retrying may help in that case.
-             */
-            return -EAGAIN;
-        }
-
-        plane_req_config->config.syncParams.semaphoreSpecified = true;
-        plane_req_config->config.syncParams.u.semaphore.index = semaphore_index;
-
-        fence_data = nv_drm_calloc(1, sizeof(*fence_data));
-
-        if (!fence_data) {
-            NV_DRM_DEV_LOG_ERR(
-                nv_dev,
-                "Failed to allocate callback data for plane fence");
-            nvKms->cancelDisplaySemaphore(nv_dev->pDevice, semaphore_index);
-            return -ENOMEM;
-        }
-
-        fence_data->nv_dev = nv_dev;
-        fence_data->semaphore_index = semaphore_index;
-
-        ret = nv_dma_fence_add_callback(plane_state->fence,
-                                        &fence_data->dma_fence_cb,
-                                        __nv_drm_plane_fence_cb);
-
-        switch (ret) {
-        case -ENOENT:
-            /* The fence is already signaled */
-            __nv_drm_plane_fence_cb(plane_state->fence,
-                                    &fence_data->dma_fence_cb);
-#if defined(fallthrough)
-            fallthrough;
-#else
-            /* Fallthrough */
-#endif
-        case 0:
-            /*
-             * The plane state's fence reference has either been consumed or
-             * belongs to the outstanding callback now.
-             */
-            plane_state->fence = NULL;
-            break;
-        default:
-            NV_DRM_DEV_LOG_ERR(
-                nv_dev,
-                "Failed plane fence callback registration");
-            /* Fence callback registration failed */
-            nvKms->cancelDisplaySemaphore(nv_dev->pDevice, semaphore_index);
-            nv_drm_free(fence_data);
-            return ret;
-        }
-    }
-
-    return 0;
-}
-#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
-
 static int __nv_drm_get_syncpt_data(
    struct nv_drm_device *nv_dev,
    struct drm_crtc *crtc,
@@ -421,6 +258,11 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
                               commit ? crtc->state : crtc_state;
        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);

+        requested_config->headRequestedConfig[nv_crtc->head] =
+            to_nv_crtc_state(new_crtc_state)->req_config;
+
+        requested_config->headsMask |= 1 << nv_crtc->head;
+
        if (commit) {
            struct drm_crtc_state *old_crtc_state = crtc_state;
            struct nv_drm_crtc_state *nv_new_crtc_state =
@@ -440,27 +282,7 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,

                nv_new_crtc_state->nv_flip = NULL;
            }
-
-#if defined(NV_DRM_FENCE_AVAILABLE)
-            ret = __nv_drm_convert_in_fences(nv_dev,
-                                             state,
-                                             crtc,
-                                             new_crtc_state);
-
-            if (ret != 0) {
-                return ret;
-            }
-#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
        }
-
-        /*
-         * Do this deep copy after calling __nv_drm_convert_in_fences,
-         * which modifies the new CRTC state's req_config member
-         */
-        requested_config->headRequestedConfig[nv_crtc->head] =
-            to_nv_crtc_state(new_crtc_state)->req_config;
-
-        requested_config->headsMask |= 1 << nv_crtc->head;
    }

    if (commit && nvKms->systemInfo.bAllowWriteCombining) {
@@ -491,10 +313,6 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
        }
    }

-    if (commit && nv_dev->requiresVrrSemaphores && reply_config.vrrFlip) {
-        nvKms->signalVrrSemaphore(nv_dev->pDevice, reply_config.vrrSemaphoreIndex);
-    }
-
    return 0;
 }

@@ -688,6 +506,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,

        goto done;
    }
+    nv_dev->drmMasterChangedSinceLastAtomicCommit = NV_FALSE;

    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
@@ -768,9 +587,6 @@ int nv_drm_atomic_commit(struct drm_device *dev,
                NV_DRM_DEV_LOG_ERR(
                    nv_dev,
                    "Flip event timeout on head %u", nv_crtc->head);
-                while (!list_empty(&nv_crtc->flip_list)) {
-                    __nv_drm_handle_flip_event(nv_crtc);
-                }
            }
        }
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
@@ -59,14 +59,20 @@ typedef struct nv_timer nv_drm_timer;
 #endif

 #if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
+#define NV_DRM_FBDEV_AVAILABLE
 #define NV_DRM_FBDEV_GENERIC_AVAILABLE
 #endif

+#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
+#define NV_DRM_FBDEV_AVAILABLE
+#define NV_DRM_FBDEV_TTM_AVAILABLE
+#endif
+
 struct page;

 /* Set to true when the atomic modeset feature is enabled. */
 extern bool nv_drm_modeset_module_param;
-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+#if defined(NV_DRM_FBDEV_AVAILABLE)
 /* Set to true when the nvidia-drm driver should install a framebuffer device */
 extern bool nv_drm_fbdev_module_param;
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-priv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h
@@ -147,18 +147,22 @@ struct nv_drm_device {
    NvBool hasVideoMemory;

    NvBool supportsSyncpts;
-    NvBool requiresVrrSemaphores;
    NvBool subOwnershipGranted;
    NvBool hasFramebufferConsole;

+    /**
+     * @drmMasterChangedSinceLastAtomicCommit:
+     *
+     * This flag is set in nv_drm_master_set and reset after a completed atomic
+     * commit. It is used to restore or recommit state that is lost by the
+     * NvKms modeset owner change, such as the CRTC color management
+     * properties.
+     */
+    NvBool drmMasterChangedSinceLastAtomicCommit;
+
    struct drm_property *nv_out_fence_property;
    struct drm_property *nv_input_colorspace_property;

-    struct {
-        NvU32 count;
-        NvU32 next_index;
-    } display_semaphores;
-
 #if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
    struct drm_property *nv_hdr_output_metadata_property;
 #endif
@@ -166,19 +170,6 @@ struct nv_drm_device {
    struct nv_drm_device *next;
 };

-static inline NvU32 nv_drm_next_display_semaphore(
-    struct nv_drm_device *nv_dev)
-{
-    NvU32 current_index = nv_dev->display_semaphores.next_index++;
-
-    if (nv_dev->display_semaphores.next_index >=
-        nv_dev->display_semaphores.count) {
-        nv_dev->display_semaphores.next_index = 0;
-    }
-
-    return current_index;
-}
-
 static inline struct nv_drm_device *to_nv_device(
    struct drm_device *dev)
 {
--- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk
+++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk
@@ -67,6 +67,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
@@ -128,5 +129,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
 NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
--- a/kernel-open/nvidia-modeset/nv-kthread-q.c
+++ b/kernel-open/nvidia-modeset/nv-kthread-q.c
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),

        // Ran out of attempts - return thread even if its stack may not be
        // allocated on the preferred node
-        if ((i == (attempts - 1)))
+        if (i == (attempts - 1))
            break;

        // Get the NUMA node where the first page of the stack is resident. If
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -77,10 +77,10 @@ module_param_named(disable_hdmi_frl, disable_hdmi_frl, bool, 0400);
 static bool disable_vrr_memclk_switch = false;
 module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);

-static bool hdmi_deepcolor = true;
+static bool hdmi_deepcolor = false;
 module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);

-static bool vblank_sem_control = true;
+static bool vblank_sem_control = false;
 module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);

 static bool opportunistic_display_sync = true;
@@ -139,20 +139,6 @@ NvBool nvkms_opportunistic_display_sync(void)
    return opportunistic_display_sync;
 }

-NvBool nvkms_kernel_supports_syncpts(void)
-{
-/*
- * Note this only checks that the kernel has the prerequisite
- * support for syncpts; callers must also check that the hardware
- * supports syncpts.
- */
-#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
-    return NV_TRUE;
-#else
-    return NV_FALSE;
-#endif
-}
-
 #define NVKMS_SYNCPT_STUBS_NEEDED

 /*************************************************************************
@@ -1084,7 +1070,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
    nvKmsKapiHandleEventQueueChange(device);
 }

-struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
+static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
                                         struct NvKmsKapiDevice *device,
                                         int *status)
 {
@@ -1136,7 +1122,7 @@ failed:
    return NULL;
 }

-void nvkms_close_pm_locked(struct nvkms_per_open *popen)
+static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
 {
    /*
     * Don't use down_interruptible(): we need to free resources
@@ -1199,7 +1185,7 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
    }
 }

-int nvkms_ioctl_common
+static int nvkms_ioctl_common
 (
    struct nvkms_per_open *popen,
    NvU32 cmd, NvU64 address, const size_t size
@@ -1248,26 +1234,6 @@ void nvkms_close_from_kapi(struct nvkms_per_open *popen)
    nvkms_close_pm_unlocked(popen);
 }

-NvBool nvkms_ioctl_from_kapi_try_pmlock
-(
-    struct nvkms_per_open *popen,
-    NvU32 cmd, void *params_address, const size_t param_size
-)
-{
-    NvBool ret;
-
-    if (nvkms_read_trylock_pm_lock()) {
-        return NV_FALSE;
-    }
-
-    ret = nvkms_ioctl_common(popen,
-                             cmd,
-                             (NvU64)(NvUPtr)params_address, param_size) == 0;
-    nvkms_read_unlock_pm_lock();
-
-    return ret;
-}
-
 NvBool nvkms_ioctl_from_kapi
 (
    struct nvkms_per_open *popen,
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -304,11 +304,6 @@ NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info);

 NvBool nvkms_allow_write_combining(void);

-/*!
- * Check if OS supports syncpoints.
- */
-NvBool nvkms_kernel_supports_syncpts(void);
-
 /*!
 * Checks whether the fd is associated with an nvidia character device.
 */
@@ -333,16 +328,6 @@ NvBool nvkms_ioctl_from_kapi
    NvU32 cmd, void *params_address, const size_t params_size
 );

-/*!
- * Like nvkms_ioctl_from_kapi, but return NV_FALSE instead of waiting if the
- * power management read lock cannot be acquired.
- */
-NvBool nvkms_ioctl_from_kapi_try_pmlock
-(
-    struct nvkms_per_open *popen,
-    NvU32 cmd, void *params_address, const size_t params_size
-);
-
 /*!
 * APIs for locking.
 */
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -105,4 +105,3 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
--- a/kernel-open/nvidia-peermem/nvidia-peermem.c
+++ b/kernel-open/nvidia-peermem/nvidia-peermem.c
@@ -60,13 +60,6 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
 module_param(peerdirect_support, int, S_IRUGO);
 MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");

-enum {
-        NV_MEM_PERSISTENT_API_SUPPORT_LEGACY = 0,
-        NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT = 1,
-};
-static int persistent_api_support = NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT;
-module_param(persistent_api_support, int, S_IRUGO);
-MODULE_PARM_DESC(persistent_api_support, "Set level of support for persistent APIs, 0 [legacy] or 1 [default]");

 #define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
 #ifdef NV_MEM_DEBUG
@@ -486,8 +479,32 @@ static struct peer_memory_client nv_mem_client_nc = {
    .release        = nv_mem_release,
 };

-static int nv_mem_legacy_client_init(void)
+#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
+
+static int nv_mem_param_conf_check(void)
 {
+    int rc = 0;
+    switch (peerdirect_support) {
+    case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
+    case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
+        break;
+    default:
+        peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
+        rc = -EINVAL;
+        break;
+    }
+    return rc;
+}
+
+static int __init nv_mem_client_init(void)
+{
+    int rc;
+    rc = nv_mem_param_conf_check();
+    if (rc) {
+        return rc;
+    }
+
+#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
    // off by one, to leave space for the trailing '1' which is flagging
    // the new client type
    BUG_ON(strlen(DRV_NAME) > IB_PEER_MEMORY_NAME_MAX-1);
@@ -516,96 +533,19 @@ static int nv_mem_legacy_client_init(void)
                         &mem_invalidate_callback);
    if (!reg_handle) {
        peer_err("nv_mem_client_init -- error while registering traditional client\n");
-        return -EINVAL;
+        rc = -EINVAL;
+        goto out;
    }
-    return 0;
-}

-static int nv_mem_nc_client_init(void)
-{
    // The nc client enables support for persistent pages.
-    if (persistent_api_support == NV_MEM_PERSISTENT_API_SUPPORT_LEGACY)
-    {
-        //
-        // If legacy behavior is forced via module param,
-        // both legacy and persistent clients are registered and are named
-        // "nv_mem"(legacy) and "nv_mem_nc"(persistent).
-        //
-        strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
-    }
-    else
-    {
-        //
-        // With default persistent behavior, the client name shall be "nv_mem"
-        // so that libraries can use the persistent client under the same name.
-        //
-        strcpy(nv_mem_client_nc.name, DRV_NAME);
-    }
-
+    strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
    strcpy(nv_mem_client_nc.version, DRV_VERSION);
    reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
    if (!reg_handle_nc) {
        peer_err("nv_mem_client_init -- error while registering nc client\n");
-        return -EINVAL;
-    }
-    return 0;
-}
-
-#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
-
-static int nv_mem_param_peerdirect_conf_check(void)
-{
-    int rc = 0;
-    switch (peerdirect_support) {
-    case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
-    case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
-        break;
-    default:
-        peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
        rc = -EINVAL;
-        break;
-    }
-    return rc;
-}
-
-static int nv_mem_param_persistent_api_conf_check(void)
-{
-    int rc = 0;
-    switch (persistent_api_support) {
-    case NV_MEM_PERSISTENT_API_SUPPORT_DEFAULT:
-    case NV_MEM_PERSISTENT_API_SUPPORT_LEGACY:
-        break;
-    default:
-        peer_err("invalid persistent_api_support param value %d\n", persistent_api_support);
-        rc = -EINVAL;
-        break;
-    }
-    return rc;
-}
-
-static int __init nv_mem_client_init(void)
-{
-#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
-    int rc;
-    rc = nv_mem_param_peerdirect_conf_check();
-    if (rc) {
-        return rc;
-    }
-
-    rc = nv_mem_param_persistent_api_conf_check();
-    if (rc) {
-        return rc;
-    }
-
-    if (persistent_api_support == NV_MEM_PERSISTENT_API_SUPPORT_LEGACY) {
-        rc = nv_mem_legacy_client_init();
-        if (rc)
-            goto out;
-    }
-
-    rc = nv_mem_nc_client_init();
-    if (rc)
        goto out;
+    }

 out:
    if (rc) {
--- a/kernel-open/nvidia-uvm/clc96f.h
+++ b/kernel-open/nvidia-uvm/clc96f.h
@@ -1,329 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef _clc96f_h_
-#define _clc96f_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "nvtypes.h"
-
-/* class BLACKWELL_CHANNEL_GPFIFO  */
-/*
- * Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
- * chapter "User Control Registers". It is documented as device NV_UDMA.
- * The GPFIFO format itself is also documented in dev_pbdma.ref,
- * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
- * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
- *
- * Note there is no .mfs file for this class.
- */
-#define  BLACKWELL_CHANNEL_GPFIFO_A                           (0x0000C96F)
-
-#define NVC96F_TYPEDEF                             BLACKWELL_CHANNELChannelGPFifoA
-
-/* dma flow control data structure */
-typedef volatile struct Nvc96fControl_struct {
- NvU32 Ignored00[0x23];        /*                                  0000-008b*/
- NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
- NvU32 Ignored01[0x5c];
-} Nvc96fControl, BlackwellAControlGPFifo;
-
-/* fields and values */
-#define NVC96F_NUMBER_OF_SUBCHANNELS                               (8)
-#define NVC96F_SET_OBJECT                                          (0x00000000)
-#define NVC96F_SET_OBJECT_NVCLASS                                         15:0
-#define NVC96F_SET_OBJECT_ENGINE                                         20:16
-#define NVC96F_SET_OBJECT_ENGINE_SW                                 0x0000001f
-#define NVC96F_NOP                                                 (0x00000008)
-#define NVC96F_NOP_HANDLE                                                 31:0
-#define NVC96F_NON_STALL_INTERRUPT                                 (0x00000020)
-#define NVC96F_NON_STALL_INTERRUPT_HANDLE                                 31:0
-#define NVC96F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
-#define NVC96F_FB_FLUSH_HANDLE                                            31:0
-// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
-// specifying the page address for a targeted TLB invalidate and the uTLB for
-// a targeted REPLAY_CANCEL for UVM.
-// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
-// rearranged fields.
-#define NVC96F_MEM_OP_A                                            (0x00000028)
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0  // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE                         7:6  // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating  link TLB only, or non-link TLB only or all TLBs
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS                  0
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS                 1
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS             2
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD                    3
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                8:0  // only relevant for REPLAY_CANCEL_VA_GLOBAL
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
-#define NVC96F_MEM_OP_B                                            (0x0000002c)
-#define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
-#define NVC96F_MEM_OP_C                                            (0x00000030)
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE                                        2:0
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
-#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG            19:0
-// MEM_OP_D MUST be preceded by MEM_OPs A-C.
-#define NVC96F_MEM_OP_D                                            (0x00000034)
-#define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
-#define NVC96F_MEM_OP_D_OPERATION                                        31:27
-#define NVC96F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
-#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
-#define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
-#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION                     0x0000000b
-#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
-// CLEAN_LINES is an alias for Tegra/GPU IP usage
-#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
-#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
-#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE         0x00000011
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE          0x00000012
-#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
-#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE                               23:20
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED                 0x00000000
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP   0x00000001
-#define NVC96F_SEM_ADDR_LO                                         (0x0000005c)
-#define NVC96F_SEM_ADDR_LO_OFFSET                                         31:2
-#define NVC96F_SEM_ADDR_HI                                         (0x00000060)
-#define NVC96F_SEM_ADDR_HI_OFFSET                                         24:0
-#define NVC96F_SEM_PAYLOAD_LO                                      (0x00000064)
-#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
-#define NVC96F_SEM_PAYLOAD_HI                                      (0x00000068)
-#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
-#define NVC96F_SEM_EXECUTE                                         (0x0000006c)
-#define NVC96F_SEM_EXECUTE_OPERATION                                       2:0
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
-#define NVC96F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
-#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK                               18:18
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS                      0x00000000
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN                       0x00000001
-#define NVC96F_SEM_EXECUTE_RELEASE_WFI                                   20:20
-#define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
-#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
-#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
-#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
-#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
-#define NVC96F_SEM_EXECUTE_REDUCTION                                     30:27
-#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
-#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
-#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
-#define NVC96F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
-#define NVC96F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
-#define NVC96F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
-#define NVC96F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
-#define NVC96F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
-#define NVC96F_WFI                                                 (0x00000078)
-#define NVC96F_WFI_SCOPE                                                   0:0
-#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
-#define NVC96F_WFI_SCOPE_CURRENT_VEID                               0x00000000
-#define NVC96F_WFI_SCOPE_ALL                                        0x00000001
-#define NVC96F_YIELD                                               (0x00000080)
-#define NVC96F_YIELD_OP                                                    1:0
-#define NVC96F_YIELD_OP_NOP                                         0x00000000
-#define NVC96F_YIELD_OP_TSG                                         0x00000003
-#define NVC96F_CLEAR_FAULTED                                       (0x00000084)
-// Note: RM provides the HANDLE as an opaque value; the internal detail fields
-// are intentionally not exposed to the driver through these defines.
-#define NVC96F_CLEAR_FAULTED_HANDLE                                       30:0
-#define NVC96F_CLEAR_FAULTED_TYPE                                        31:31
-#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
-#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-
-
-/* GPFIFO entry format */
-#define NVC96F_GP_ENTRY__SIZE                                          8
-#define NVC96F_GP_ENTRY0_FETCH                                       0:0
-#define NVC96F_GP_ENTRY0_FETCH_UNCONDITIONAL                  0x00000000
-#define NVC96F_GP_ENTRY0_FETCH_CONDITIONAL                    0x00000001
-#define NVC96F_GP_ENTRY0_GET                                        31:2
-#define NVC96F_GP_ENTRY0_OPERAND                                    31:0
-#define NVC96F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND                   24:8
-#define NVC96F_GP_ENTRY1_GET_HI                                      7:0
-#define NVC96F_GP_ENTRY1_LEVEL                                       9:9
-#define NVC96F_GP_ENTRY1_LEVEL_MAIN                           0x00000000
-#define NVC96F_GP_ENTRY1_LEVEL_SUBROUTINE                     0x00000001
-#define NVC96F_GP_ENTRY1_LENGTH                                    30:10
-#define NVC96F_GP_ENTRY1_SYNC                                      31:31
-#define NVC96F_GP_ENTRY1_SYNC_PROCEED                         0x00000000
-#define NVC96F_GP_ENTRY1_SYNC_WAIT                            0x00000001
-#define NVC96F_GP_ENTRY1_OPCODE                                      7:0
-#define NVC96F_GP_ENTRY1_OPCODE_NOP                           0x00000000
-#define NVC96F_GP_ENTRY1_OPCODE_ILLEGAL                       0x00000001
-#define NVC96F_GP_ENTRY1_OPCODE_GP_CRC                        0x00000002
-#define NVC96F_GP_ENTRY1_OPCODE_PB_CRC                        0x00000003
-#define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE  0x00000004
-
-/* dma method formats */
-#define NVC96F_DMA_METHOD_ADDRESS_OLD                              12:2
-#define NVC96F_DMA_METHOD_ADDRESS                                  11:0
-#define NVC96F_DMA_SUBDEVICE_MASK                                  15:4
-#define NVC96F_DMA_METHOD_SUBCHANNEL                               15:13
-#define NVC96F_DMA_TERT_OP                                         17:16
-#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
-#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
-#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
-#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
-#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
-#define NVC96F_DMA_METHOD_COUNT_OLD                                28:18
-#define NVC96F_DMA_METHOD_COUNT                                    28:16
-#define NVC96F_DMA_IMMD_DATA                                       28:16
-#define NVC96F_DMA_SEC_OP                                          31:29
-#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
-#define NVC96F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
-#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
-#define NVC96F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
-#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
-#define NVC96F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
-#define NVC96F_DMA_SEC_OP_RESERVED6                                (0x00000006)
-#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
-/* dma incrementing method format */
-#define NVC96F_DMA_INCR_ADDRESS                                    11:0
-#define NVC96F_DMA_INCR_SUBCHANNEL                                 15:13
-#define NVC96F_DMA_INCR_COUNT                                      28:16
-#define NVC96F_DMA_INCR_OPCODE                                     31:29
-#define NVC96F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
-#define NVC96F_DMA_INCR_DATA                                       31:0
-/* dma non-incrementing method format */
-#define NVC96F_DMA_NONINCR_ADDRESS                                 11:0
-#define NVC96F_DMA_NONINCR_SUBCHANNEL                              15:13
-#define NVC96F_DMA_NONINCR_COUNT                                   28:16
-#define NVC96F_DMA_NONINCR_OPCODE                                  31:29
-#define NVC96F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
-#define NVC96F_DMA_NONINCR_DATA                                    31:0
-/* dma increment-once method format */
-#define NVC96F_DMA_ONEINCR_ADDRESS                                 11:0
-#define NVC96F_DMA_ONEINCR_SUBCHANNEL                              15:13
-#define NVC96F_DMA_ONEINCR_COUNT                                   28:16
-#define NVC96F_DMA_ONEINCR_OPCODE                                  31:29
-#define NVC96F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
-#define NVC96F_DMA_ONEINCR_DATA                                    31:0
-/* dma no-operation format */
-#define NVC96F_DMA_NOP                                             (0x00000000)
-/* dma immediate-data format */
-#define NVC96F_DMA_IMMD_ADDRESS                                    11:0
-#define NVC96F_DMA_IMMD_SUBCHANNEL                                 15:13
-#define NVC96F_DMA_IMMD_DATA                                       28:16
-#define NVC96F_DMA_IMMD_OPCODE                                     31:29
-#define NVC96F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
-/* dma set sub-device mask format */
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
-/* dma store sub-device mask format */
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
-/* dma use sub-device mask format */
-#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
-/* dma end-segment format */
-#define NVC96F_DMA_ENDSEG_OPCODE                                   31:29
-#define NVC96F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
-/* dma legacy incrementing/non-incrementing formats */
-#define NVC96F_DMA_ADDRESS                                         12:2
-#define NVC96F_DMA_SUBCH                                           15:13
-#define NVC96F_DMA_OPCODE3                                         17:16
-#define NVC96F_DMA_OPCODE3_NONE                                    (0x00000000)
-#define NVC96F_DMA_COUNT                                           28:18
-#define NVC96F_DMA_OPCODE                                          31:29
-#define NVC96F_DMA_OPCODE_METHOD                                   (0x00000000)
-#define NVC96F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
-#define NVC96F_DMA_DATA                                            31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-
-#endif /* _clc96f_h_ */
--- a/kernel-open/nvidia-uvm/clc9b5.h
+++ b/kernel-open/nvidia-uvm/clc9b5.h
@@ -1,460 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-
-#include "nvtypes.h"
-
-#ifndef _clc9b5_h_
-#define _clc9b5_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BLACKWELL_DMA_COPY_A                                                            (0x0000C9B5)
-
-typedef volatile struct _clc9b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x36];
-    NvV32 SetMonitoredFenceType;                                                // 0x0000021C - 0x0000021F
-    NvV32 SetMonitoredFenceSignalAddrBaseUpper;                                 // 0x00000220 - 0x00000223
-    NvV32 SetMonitoredFenceSignalAddrBaseLower;                                 // 0x00000224 - 0x00000227
-    NvV32 Reserved03[0x6];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 SetSemaphorePayloadUpper;                                             // 0x0000024C - 0x0000024F
-    NvV32 Reserved04[0x1];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved05[0x26];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved06[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved07[0x38];
-    NvV32 SetSecureCopyMode;                                                    // 0x00000500 - 0x00000503
-    NvV32 SetDecryptIv0;                                                        // 0x00000504 - 0x00000507
-    NvV32 SetDecryptIv1;                                                        // 0x00000508 - 0x0000050B
-    NvV32 SetDecryptIv2;                                                        // 0x0000050C - 0x0000050F
-    NvV32 Reserved_SetAESCounter;                                               // 0x00000510 - 0x00000513
-    NvV32 SetDecryptAuthTagCompareAddrUpper;                                    // 0x00000514 - 0x00000517
-    NvV32 SetDecryptAuthTagCompareAddrLower;                                    // 0x00000518 - 0x0000051B
-    NvV32 Reserved08[0x5];
-    NvV32 SetEncryptAuthTagAddrUpper;                                           // 0x00000530 - 0x00000533
-    NvV32 SetEncryptAuthTagAddrLower;                                           // 0x00000534 - 0x00000537
-    NvV32 SetEncryptIvAddrUpper;                                                // 0x00000538 - 0x0000053B
-    NvV32 SetEncryptIvAddrLower;                                                // 0x0000053C - 0x0000053F
-    NvV32 Reserved09[0x10];
-    NvV32 SetCompressionParameters;                                             // 0x00000580 - 0x00000583
-    NvV32 SetDecompressOutLength;                                               // 0x00000584 - 0x00000587
-    NvV32 SetDecompressOutLengthAddrUpper;                                      // 0x00000588 - 0x0000058B
-    NvV32 SetDecompressOutLengthAddrLower;                                      // 0x0000058C - 0x0000058F
-    NvV32 SetDecompressChecksum;                                                // 0x00000590 - 0x00000593
-    NvV32 Reserved10[0x5A];
-    NvV32 SetMemoryScrubParameters;                                             // 0x000006FC - 0x000006FF
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved11[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved12[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved13[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved14[0x3BA];
-} blackwell_dma_copy_aControlPio;
-
-#define NVC9B5_NOP                                                              (0x00000100)
-#define NVC9B5_NOP_PARAMETER                                                    31:0
-#define NVC9B5_PM_TRIGGER                                                       (0x00000140)
-#define NVC9B5_PM_TRIGGER_V                                                     31:0
-#define NVC9B5_SET_MONITORED_FENCE_TYPE                                         (0x0000021C)
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE                                    0:0
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE                    (0x00000000)
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT                (0x00000001)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER                       (0x00000220)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER                 24:0
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER                       (0x00000224)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER                 31:0
-#define NVC9B5_SET_SEMAPHORE_A                                                  (0x00000240)
-#define NVC9B5_SET_SEMAPHORE_A_UPPER                                            24:0
-#define NVC9B5_SET_SEMAPHORE_B                                                  (0x00000244)
-#define NVC9B5_SET_SEMAPHORE_B_LOWER                                            31:0
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x0000024C)
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD                              31:0
-#define NVC9B5_SET_RENDER_ENABLE_A                                              (0x00000254)
-#define NVC9B5_SET_RENDER_ENABLE_A_UPPER                                        24:0
-#define NVC9B5_SET_RENDER_ENABLE_B                                              (0x00000258)
-#define NVC9B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
-#define NVC9B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE                                         2:0
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
-#define NVC9B5_SET_SRC_PHYS_MODE                                                (0x00000260)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
-#define NVC9B5_SET_SRC_PHYS_MODE_FLA                                            9:9
-#define NVC9B5_SET_DST_PHYS_MODE                                                (0x00000264)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET                                         1:0
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
-#define NVC9B5_SET_DST_PHYS_MODE_FLA                                            9:9
-#define NVC9B5_LAUNCH_DMA                                                       (0x00000300)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP         (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP       (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE                                    11:11
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE                              (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE                                              12:12
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DST_TYPE                                              13:13
-#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA                          (0x00000008)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB                          (0x00000009)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN                              (0x0000000B)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX                              (0x0000000C)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC                          (0x0000000D)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD                          (0x0000000E)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE                          (0x0000000F)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE                                             21:20
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT                                     (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE                                      (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT                             (0x00000002)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED                                    (0x00000003)
-#define NVC9B5_LAUNCH_DMA_VPRMODE                                               22:22
-#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE                                   23:23
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE                             (0x00000000)
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE                              (0x00000001)
-#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE                                27:27
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD                       (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD                       (0x00000001)
-#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
-#define NVC9B5_OFFSET_IN_UPPER                                                  (0x00000400)
-#define NVC9B5_OFFSET_IN_UPPER_UPPER                                            24:0
-#define NVC9B5_OFFSET_IN_LOWER                                                  (0x00000404)
-#define NVC9B5_OFFSET_IN_LOWER_VALUE                                            31:0
-#define NVC9B5_OFFSET_OUT_UPPER                                                 (0x00000408)
-#define NVC9B5_OFFSET_OUT_UPPER_UPPER                                           24:0
-#define NVC9B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
-#define NVC9B5_OFFSET_OUT_LOWER_VALUE                                           31:0
-#define NVC9B5_PITCH_IN                                                         (0x00000410)
-#define NVC9B5_PITCH_IN_VALUE                                                   31:0
-#define NVC9B5_PITCH_OUT                                                        (0x00000414)
-#define NVC9B5_PITCH_OUT_VALUE                                                  31:0
-#define NVC9B5_LINE_LENGTH_IN                                                   (0x00000418)
-#define NVC9B5_LINE_LENGTH_IN_VALUE                                             31:0
-#define NVC9B5_LINE_COUNT                                                       (0x0000041C)
-#define NVC9B5_LINE_COUNT_VALUE                                                 31:0
-#define NVC9B5_SET_SECURE_COPY_MODE                                             (0x00000500)
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE                                        0:0
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT                                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT                                (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET                         20:19
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM                 (0x00000003)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID                        23:21
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA                            24:24
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET                         26:25
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM                 (0x00000003)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID                        29:27
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA                            30:30
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY                        31:31
-#define NVC9B5_SET_DECRYPT_IV0                                                  (0x00000504)
-#define NVC9B5_SET_DECRYPT_IV0_VALUE                                            31:0
-#define NVC9B5_SET_DECRYPT_IV1                                                  (0x00000508)
-#define NVC9B5_SET_DECRYPT_IV1_VALUE                                            31:0
-#define NVC9B5_SET_DECRYPT_IV2                                                  (0x0000050C)
-#define NVC9B5_SET_DECRYPT_IV2_VALUE                                            31:0
-#define NVC9B5_RESERVED_SET_AESCOUNTER                                          (0x00000510)
-#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE                                    31:0
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER                          (0x00000514)
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER                    24:0
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER                          (0x00000518)
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER                    31:0
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER                                  (0x00000530)
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER                            24:0
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER                                  (0x00000534)
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER                            31:0
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER                                        (0x00000538)
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER                                  24:0
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER                                        (0x0000053C)
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER                                  31:0
-#define NVC9B5_SET_COMPRESSION_PARAMETERS                                       (0x00000580)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION                             0:0
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS                  (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS                    (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO                                  3:1
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY                           (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY                    (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK                        (0x00000002)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM               (0x00000003)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE                          (0x00000004)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH           (0x00000005)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM                             29:28
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE                        (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32                     (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32                       (0x00000002)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC                  (0x00000003)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH                                        (0x00000584)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V                                      31:0
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER                             (0x00000588)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER                       24:0
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER                             (0x0000058C)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER                       31:0
-#define NVC9B5_SET_DECOMPRESS_CHECKSUM                                          (0x00000590)
-#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V                                        31:0
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS                                      (0x000006FC)
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE                          0:0
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE                    (0x00000000)
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE                     (0x00000001)
-#define NVC9B5_SET_REMAP_CONST_A                                                (0x00000700)
-#define NVC9B5_SET_REMAP_CONST_A_V                                              31:0
-#define NVC9B5_SET_REMAP_CONST_B                                                (0x00000704)
-#define NVC9B5_SET_REMAP_CONST_B_V                                              31:0
-#define NVC9B5_SET_REMAP_COMPONENTS                                             (0x00000708)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
-#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC9B5_SET_DST_WIDTH                                                    (0x00000710)
-#define NVC9B5_SET_DST_WIDTH_V                                                  31:0
-#define NVC9B5_SET_DST_HEIGHT                                                   (0x00000714)
-#define NVC9B5_SET_DST_HEIGHT_V                                                 31:0
-#define NVC9B5_SET_DST_DEPTH                                                    (0x00000718)
-#define NVC9B5_SET_DST_DEPTH_V                                                  31:0
-#define NVC9B5_SET_DST_LAYER                                                    (0x0000071C)
-#define NVC9B5_SET_DST_LAYER_V                                                  31:0
-#define NVC9B5_SET_DST_ORIGIN                                                   (0x00000720)
-#define NVC9B5_SET_DST_ORIGIN_X                                                 15:0
-#define NVC9B5_SET_DST_ORIGIN_Y                                                 31:16
-#define NVC9B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC9B5_SET_SRC_WIDTH                                                    (0x0000072C)
-#define NVC9B5_SET_SRC_WIDTH_V                                                  31:0
-#define NVC9B5_SET_SRC_HEIGHT                                                   (0x00000730)
-#define NVC9B5_SET_SRC_HEIGHT_V                                                 31:0
-#define NVC9B5_SET_SRC_DEPTH                                                    (0x00000734)
-#define NVC9B5_SET_SRC_DEPTH_V                                                  31:0
-#define NVC9B5_SET_SRC_LAYER                                                    (0x00000738)
-#define NVC9B5_SET_SRC_LAYER_V                                                  31:0
-#define NVC9B5_SET_SRC_ORIGIN                                                   (0x0000073C)
-#define NVC9B5_SET_SRC_ORIGIN_X                                                 15:0
-#define NVC9B5_SET_SRC_ORIGIN_Y                                                 31:16
-#define NVC9B5_SRC_ORIGIN_X                                                     (0x00000744)
-#define NVC9B5_SRC_ORIGIN_X_VALUE                                               31:0
-#define NVC9B5_SRC_ORIGIN_Y                                                     (0x00000748)
-#define NVC9B5_SRC_ORIGIN_Y_VALUE                                               31:0
-#define NVC9B5_DST_ORIGIN_X                                                     (0x0000074C)
-#define NVC9B5_DST_ORIGIN_X_VALUE                                               31:0
-#define NVC9B5_DST_ORIGIN_Y                                                     (0x00000750)
-#define NVC9B5_DST_ORIGIN_Y_VALUE                                               31:0
-#define NVC9B5_PM_TRIGGER_END                                                   (0x00001114)
-#define NVC9B5_PM_TRIGGER_END_V                                                 31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-#endif // _clc9b5_h
-
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -34,7 +34,6 @@
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100                (0x00000170)
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100                (0x00000180)
 #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100                (0x00000190)
-#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100                (0x000001A0)

 /* valid ARCHITECTURE_GP10x implementation values */
 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100              (0x00000000)
--- a/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_fault.h
+++ b/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_fault.h
@@ -1,546 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2003-2016 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef __gb100_dev_fault_h__
-#define __gb100_dev_fault_h__
-/* This file is autogenerated.  Do not edit */
-#define NV_PFAULT                                              /* ----G */
-#define NV_PFAULT_MMU_ENG_ID_GRAPHICS          384 /*       */
-#define NV_PFAULT_MMU_ENG_ID_DISPLAY           1 /*       */
-#define NV_PFAULT_MMU_ENG_ID_GSP               2 /*       */
-#define NV_PFAULT_MMU_ENG_ID_IFB               55 /*       */
-#define NV_PFAULT_MMU_ENG_ID_FLA               4 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1              256 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2              320 /*       */
-#define NV_PFAULT_MMU_ENG_ID_SEC               6 /*       */
-#define NV_PFAULT_MMU_ENG_ID_FSP               7 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF              10 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF0             10 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF1             11 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF2             12 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF3             13 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF4             14 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF5             15 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF6             16 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF7             17 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF8             18 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PERF9             19 /*       */
-#define NV_PFAULT_MMU_ENG_ID_GSPLITE          20 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC             28 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC0            28 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC1            29 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC2            30 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC3            31 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC4            32 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC5            33 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC6            34 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVDEC7            35 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG0            36 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG1            37 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG2            38 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG3            39 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG4            40 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG5            41 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG6            42 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVJPG7            43 /*       */
-#define NV_PFAULT_MMU_ENG_ID_GRCOPY            65 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE0               65 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE1               66 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE2               67 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE3               68 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE4               69 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE5               70 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE6               71 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE7               72 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE8               73 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE9               74 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE10               75 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE11               76 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE12               77 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE13               78 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE14               79 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE15               80 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE16               81 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE17               82 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE18               83 /*       */
-#define NV_PFAULT_MMU_ENG_ID_CE19               84 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PWR_PMU           5 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PTP               3 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVENC0            44 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVENC1            45 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVENC2            46 /*       */
-#define NV_PFAULT_MMU_ENG_ID_NVENC3            47 /*       */
-#define NV_PFAULT_MMU_ENG_ID_OFA0              48 /*       */
-#define NV_PFAULT_MMU_ENG_ID_PHYSICAL          56 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST0             85 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST1             86 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST2             87 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST3             88 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST4             89 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST5             90 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST6             91 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST7             92 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST8             93 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST9             94 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST10            95 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST11            96 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST12            97 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST13            98 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST14            99 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST15            100 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST16            101 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST17            102 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST18            103 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST19            104 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST20            105 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST21            106 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST22            107 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST23            108 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST24            109 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST25            110 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST26            111 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST27            112 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST28            113 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST29            114 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST30            115 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST31            116 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST32            117 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST33            118 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST34            119 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST35            120 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST36            121 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST37            122 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST38            123 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST39            124 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST40            125 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST41            126 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST42            127 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST43            128 /*       */
-#define NV_PFAULT_MMU_ENG_ID_HOST44            129 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0          256  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1          257  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2          258  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3          259  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4          260  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5          261  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6          262  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7          263  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8          264  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9          265  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10         266 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11         267 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12         268 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13         269 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14         270 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15         271 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16         272 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17         273 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18         274 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19         275 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20         276 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21         277 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22         278 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23         279 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24         280 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25         281 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26         282 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27         283 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28         284 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29         285 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30         286 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31         287 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32         288 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33         289 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34         290 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35         291 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36         292 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37         293 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38         294 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39         295 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40         296 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41         297 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42         298 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43         299 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44         300 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45         301 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46         302 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47         303 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48         304 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49         305 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50         306 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51         307 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52         308 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53         309 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54         310 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55         311 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56         312 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57         313 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58         314 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59         315 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60         316 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61         317 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62         318 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63         319 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0          320  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1          321  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2          322  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3          323  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4          324  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5          325  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6          326  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7          327  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8          328  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9          329  /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10         330 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11         331 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12         332 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13         333 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14         334 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15         335 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16         336 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17         337 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18         338 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19         339 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20         340 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21         341 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22         342 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23         343 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24         344 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25         345 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26         346 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27         347 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28         348 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29         349 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30         350 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31         351 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32         352 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33         353 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34         354 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35         355 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36         356 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37         357 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38         358 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39         359 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40         360 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41         361 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42         362 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43         363 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44         364 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45         365 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46         366 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47         367 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48         368 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49         369 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50         370 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51         371 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52         372 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53         373 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54         374 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55         375 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56         376 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57         377 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58         378 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59         379 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60         380 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61         381 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62         382 /*       */
-#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63         383 /*       */
-#define NV_PFAULT_FAULT_TYPE                             4:0 /*       */
-#define NV_PFAULT_FAULT_TYPE_PDE                  0x00000000 /*       */
-#define NV_PFAULT_FAULT_TYPE_PDE_SIZE             0x00000001 /*       */
-#define NV_PFAULT_FAULT_TYPE_PTE                  0x00000002 /*       */
-#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION   0x00000003 /*       */
-#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK   0x00000004 /*       */
-#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION       0x00000005 /*       */
-#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION         0x00000006 /*       */
-#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION         0x00000007 /*       */
-#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /*       */
-#define NV_PFAULT_FAULT_TYPE_WORK_CREATION        0x00000009 /*       */
-#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /*       */
-#define NV_PFAULT_FAULT_TYPE_CC_VIOLATION         0x0000000b /*       */
-#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND     0x0000000c /*       */
-#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION     0x0000000d /*       */
-#define NV_PFAULT_FAULT_TYPE_POISONED             0x0000000e /*       */
-#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION     0x0000000f /*       */
-#define NV_PFAULT_CLIENT                       14:8 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_0        0x00000000 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_1        0x00000001 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_2        0x00000002 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_3        0x00000003 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_4        0x00000004 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_5        0x00000005 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_6        0x00000006 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_7        0x00000007 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_0        0x00000008 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_1        0x00000009 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_2        0x0000000A /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_3        0x0000000B /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_4        0x0000000C /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_5        0x0000000D /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_6        0x0000000E /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_7        0x0000000F /*       */
-#define NV_PFAULT_CLIENT_GPC_RAST        0x00000010 /*       */
-#define NV_PFAULT_CLIENT_GPC_GCC         0x00000011 /*       */
-#define NV_PFAULT_CLIENT_GPC_GPCCS       0x00000012 /*       */
-#define NV_PFAULT_CLIENT_GPC_PROP_0      0x00000013 /*       */
-#define NV_PFAULT_CLIENT_GPC_PROP_1      0x00000014 /*       */
-#define NV_PFAULT_CLIENT_GPC_PROP_2      0x00000015 /*       */
-#define NV_PFAULT_CLIENT_GPC_PROP_3      0x00000016 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_8        0x00000021 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_9        0x00000022 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_10       0x00000023 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_11       0x00000024 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_12       0x00000025 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_13       0x00000026 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_14       0x00000027 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_15       0x00000028 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_0     0x00000029 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_1     0x0000002A /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_2     0x0000002B /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_3     0x0000002C /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_4     0x0000002D /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_5     0x0000002E /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_6     0x0000002F /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_7     0x00000030 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_8        0x00000031 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_9        0x00000032 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_8     0x00000033 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_9     0x00000034 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_16       0x00000035 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_17       0x00000036 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_18       0x00000037 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_19       0x00000038 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_10       0x00000039 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_11       0x0000003A /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_10    0x0000003B /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_11    0x0000003C /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_20       0x0000003D /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_21       0x0000003E /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_22       0x0000003F /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_23       0x00000040 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_12       0x00000041 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_13       0x00000042 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_12    0x00000043 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_13    0x00000044 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_24       0x00000045 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_25       0x00000046 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_26       0x00000047 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_27       0x00000048 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_14       0x00000049 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_15       0x0000004A /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_14    0x0000004B /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_15    0x0000004C /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_28       0x0000004D /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_29       0x0000004E /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_30       0x0000004F /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_31       0x00000050 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_16       0x00000051 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_17       0x00000052 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_16    0x00000053 /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_17    0x00000054 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_32       0x00000055 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_33       0x00000056 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_34       0x00000057 /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_35       0x00000058 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_18       0x00000059 /*       */
-#define NV_PFAULT_CLIENT_GPC_PE_19       0x0000005A /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_18    0x0000005B /*       */
-#define NV_PFAULT_CLIENT_GPC_TPCCS_19    0x0000005C /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_36       0x0000005D /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_37       0x0000005E /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_38       0x0000005F /*       */
-#define NV_PFAULT_CLIENT_GPC_T1_39       0x00000060 /*       */
-#define NV_PFAULT_CLIENT_GPC_ROP_0       0x00000070 /*       */
-#define NV_PFAULT_CLIENT_GPC_ROP_1       0x00000071 /*       */
-#define NV_PFAULT_CLIENT_GPC_ROP_2       0x00000072 /*       */
-#define NV_PFAULT_CLIENT_GPC_ROP_3       0x00000073 /*       */
-#define NV_PFAULT_CLIENT_GPC_GPM          0x00000017 /*       */
-#define NV_PFAULT_CLIENT_HUB_VIP         0x00000000 /*       */
-#define NV_PFAULT_CLIENT_HUB_CE0         0x00000001 /*       */
-#define NV_PFAULT_CLIENT_HUB_CE1         0x00000002 /*       */
-#define NV_PFAULT_CLIENT_HUB_DNISO       0x00000003 /*       */
-#define NV_PFAULT_CLIENT_HUB_DISPNISO    0x00000003 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE0         0x00000004 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE          0x00000004 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS0       0x00000005 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS        0x00000005 /*       */
-#define NV_PFAULT_CLIENT_HUB_HOST        0x00000006 /*       */
-#define NV_PFAULT_CLIENT_HUB_HOST_CPU    0x00000007 /*       */
-#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /*       */
-#define NV_PFAULT_CLIENT_HUB_ISO         0x00000009 /*       */
-#define NV_PFAULT_CLIENT_HUB_MMU         0x0000000A /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC0      0x0000000B /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC       0x0000000B /*       */
-#define NV_PFAULT_CLIENT_HUB_CE3         0x0000000C /*       */
-#define NV_PFAULT_CLIENT_HUB_NVENC1      0x0000000D /*       */
-#define NV_PFAULT_CLIENT_HUB_NISO        0x0000000E /*       */
-#define NV_PFAULT_CLIENT_HUB_ACTRS       0x0000000E /*       */
-#define NV_PFAULT_CLIENT_HUB_P2P         0x0000000F /*       */
-#define NV_PFAULT_CLIENT_HUB_PD          0x00000010 /*       */
-#define NV_PFAULT_CLIENT_HUB_PD0         0x00000010 /*       */
-#define NV_PFAULT_CLIENT_HUB_PERF0       0x00000011 /*       */
-#define NV_PFAULT_CLIENT_HUB_PERF        0x00000011 /*       */
-#define NV_PFAULT_CLIENT_HUB_PMU         0x00000012 /*       */
-#define NV_PFAULT_CLIENT_HUB_RASTERTWOD  0x00000013 /*       */
-#define NV_PFAULT_CLIENT_HUB_RASTERTWOD0 0x00000013 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC         0x00000014 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC0        0x00000014 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC_NB      0x00000015 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC_NB0     0x00000015 /*       */
-#define NV_PFAULT_CLIENT_HUB_SEC         0x00000016 /*       */
-#define NV_PFAULT_CLIENT_HUB_SSYNC       0x00000017 /*       */
-#define NV_PFAULT_CLIENT_HUB_SSYNC0      0x00000017 /*       */
-#define NV_PFAULT_CLIENT_HUB_GRCOPY      0x00000018 /*       */
-#define NV_PFAULT_CLIENT_HUB_CE2         0x00000018 /*       */
-#define NV_PFAULT_CLIENT_HUB_XV          0x00000019 /*       */
-#define NV_PFAULT_CLIENT_HUB_MMU_NB      0x0000001A /*       */
-#define NV_PFAULT_CLIENT_HUB_NVENC0      0x0000001B /*       */
-#define NV_PFAULT_CLIENT_HUB_NVENC       0x0000001B /*       */
-#define NV_PFAULT_CLIENT_HUB_DFALCON     0x0000001C /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED0       0x0000001D /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED        0x0000001D /*       */
-#define NV_PFAULT_CLIENT_HUB_PD1         0x0000001E /*       */
-#define NV_PFAULT_CLIENT_HUB_DONT_CARE   0x0000001F /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE0       0x00000020 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE1       0x00000021 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE2       0x00000022 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE3       0x00000023 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE4       0x00000024 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE5       0x00000025 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE6       0x00000026 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSCE7       0x00000027 /*       */
-#define NV_PFAULT_CLIENT_HUB_SSYNC1      0x00000028 /*       */
-#define NV_PFAULT_CLIENT_HUB_SSYNC2      0x00000029 /*       */
-#define NV_PFAULT_CLIENT_HUB_HSHUB       0x0000002A /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X0      0x0000002B /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X1      0x0000002C /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X2      0x0000002D /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X3      0x0000002E /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X4      0x0000002F /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X5      0x00000030 /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X6      0x00000031 /*       */
-#define NV_PFAULT_CLIENT_HUB_PTP_X7      0x00000032 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVENC2      0x00000033 /*       */
-#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /*       */
-#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /*       */
-#define NV_PFAULT_CLIENT_HUB_SSYNC3      0x00000036 /*       */
-#define NV_PFAULT_CLIENT_HUB_FBFALCON    0x00000037 /*       */
-#define NV_PFAULT_CLIENT_HUB_CE_SHIM     0x00000038 /*       */
-#define NV_PFAULT_CLIENT_HUB_CE_SHIM0    0x00000038 /*       */
-#define NV_PFAULT_CLIENT_HUB_GSP         0x00000039 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC1      0x0000003A /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC2      0x0000003B /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG0      0x0000003C /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC3      0x0000003D /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC4      0x0000003E /*       */
-#define NV_PFAULT_CLIENT_HUB_OFA0        0x0000003F /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC1        0x00000040 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC_NB1     0x00000041 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC2        0x00000042 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC_NB2     0x00000043 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC3        0x00000044 /*       */
-#define NV_PFAULT_CLIENT_HUB_SCC_NB3     0x00000045 /*       */
-#define NV_PFAULT_CLIENT_HUB_RASTERTWOD1 0x00000046 /*       */
-#define NV_PFAULT_CLIENT_HUB_RASTERTWOD2 0x00000047 /*       */
-#define NV_PFAULT_CLIENT_HUB_RASTERTWOD3 0x00000048 /*       */
-#define NV_PFAULT_CLIENT_HUB_GSPLITE1    0x00000049 /*       */
-#define NV_PFAULT_CLIENT_HUB_GSPLITE2    0x0000004A /*       */
-#define NV_PFAULT_CLIENT_HUB_GSPLITE3    0x0000004B /*       */
-#define NV_PFAULT_CLIENT_HUB_PD2         0x0000004C /*       */
-#define NV_PFAULT_CLIENT_HUB_PD3         0x0000004D /*       */
-#define NV_PFAULT_CLIENT_HUB_FE1         0x0000004E /*       */
-#define NV_PFAULT_CLIENT_HUB_FE2         0x0000004F /*       */
-#define NV_PFAULT_CLIENT_HUB_FE3         0x00000050 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE4         0x00000051 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE5         0x00000052 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE6         0x00000053 /*       */
-#define NV_PFAULT_CLIENT_HUB_FE7         0x00000054 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS1       0x00000055 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS2       0x00000056 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS3       0x00000057 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS4       0x00000058 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS5       0x00000059 /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS6       0x0000005A /*       */
-#define NV_PFAULT_CLIENT_HUB_FECS7       0x0000005B /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED1       0x0000005C /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED2       0x0000005D /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED3       0x0000005E /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED4       0x0000005F /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED5       0x00000060 /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED6       0x00000061 /*       */
-#define NV_PFAULT_CLIENT_HUB_SKED7       0x00000062 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC          0x00000063 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC0         0x00000063 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC1         0x00000064 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC2         0x00000065 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC3         0x00000066 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC4         0x00000067 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC5         0x00000068 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC6         0x00000069 /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC7         0x0000006a /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC8         0x0000006b /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC9         0x0000006c /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC10        0x0000006d /*       */
-#define NV_PFAULT_CLIENT_HUB_ESC11        0x0000006e /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC5      0x0000006F /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC6      0x00000070 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVDEC7      0x00000071 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG1      0x00000072 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG2      0x00000073 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG3      0x00000074 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG4      0x00000075 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG5      0x00000076 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG6      0x00000077 /*       */
-#define NV_PFAULT_CLIENT_HUB_NVJPG7      0x00000078 /*       */
-#define NV_PFAULT_CLIENT_HUB_FSP         0x00000079 /*       */
-#define NV_PFAULT_CLIENT_HUB_BSI         0x0000007A /*       */
-#define NV_PFAULT_CLIENT_HUB_GSPLITE     0x0000007B /*       */
-#define NV_PFAULT_CLIENT_HUB_GSPLITE0    0x0000007B /*       */
-#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER2 0x0000007C /*       */
-#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER3 0x0000007D /*       */
-#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER4 0x0000007E /*       */
-#define NV_PFAULT_CLIENT_HUB_NVENC3      0x0000007F /*       */
-#define NV_PFAULT_ACCESS_TYPE                 19:16 /*       */
-#define NV_PFAULT_ACCESS_TYPE_READ       0x00000000 /*       */
-#define NV_PFAULT_ACCESS_TYPE_WRITE      0x00000001 /*       */
-#define NV_PFAULT_ACCESS_TYPE_ATOMIC     0x00000002 /*       */
-#define NV_PFAULT_ACCESS_TYPE_PREFETCH   0x00000003 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_READ          0x00000000 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE         0x00000001 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC        0x00000002 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH      0x00000003 /*       */
-#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK   0x00000004 /*       */
-#define NV_PFAULT_ACCESS_TYPE_PHYS_READ          0x00000008 /*       */
-#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE         0x00000009 /*       */
-#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC        0x0000000a /*       */
-#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH      0x0000000b /*       */
-#define NV_PFAULT_MMU_CLIENT_TYPE             20:20 /*       */
-#define NV_PFAULT_MMU_CLIENT_TYPE_GPC    0x00000000 /*       */
-#define NV_PFAULT_MMU_CLIENT_TYPE_HUB    0x00000001 /*       */
-#define NV_PFAULT_GPC_ID                      28:24 /*       */
-#define NV_PFAULT_PROTECTED_MODE              29:29 /*       */
-#define NV_PFAULT_REPLAYABLE_FAULT_EN         30:30 /*       */
-#define NV_PFAULT_VALID                       31:31 /*       */
-#endif // __gb100_dev_fault_h__
--- a/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/blackwell/gb100/dev_mmu.h
@@ -1,560 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2003-2016 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef __gb100_dev_mmu_h__
-#define __gb100_dev_mmu_h__
-/* This file is autogenerated.  Do not edit */
-#define NV_MMU_PDE                                                      /* ----G */
-#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
-#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
-#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
-#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
-#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
-#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
-#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
-#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
-#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_PDE__SIZE                                              8
-#define NV_MMU_PTE                                                      /* ----G */
-#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
-#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
-#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
-#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
-#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
-#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
-#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
-#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
-#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
-#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
-#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
-#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
-#define NV_MMU_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
-#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
-#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
-#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
-#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
-#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
-#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
-#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_PTE__SIZE                                             8
-#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
-#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
-#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
-#define NV_MMU_PTE_KIND                              (1*32+7):(1*32+4) /* RWXVF */
-#define NV_MMU_PTE_KIND_INVALID                       0x07 /* R---V */
-#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY                                                  0x6 /* R---V */
-#define NV_MMU_PTE_KIND_Z16                                                             0x1 /* R---V */
-#define NV_MMU_PTE_KIND_S8                                                              0x2 /* R---V */
-#define NV_MMU_PTE_KIND_S8Z24                                                           0x3 /* R---V */
-#define NV_MMU_PTE_KIND_ZF32_X24S8                                                      0x4 /* R---V */
-#define NV_MMU_PTE_KIND_Z24S8                                                           0x5 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE                                     0x8 /* R---V */
-#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC                         0x9 /* R---V */
-#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC                                     0xA /* R---V */
-#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC                                    0xB /* R---V */
-#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC                                  0xC /* R---V */
-#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC                             0xD /* R---V */
-#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC                                  0xE /* R---V */
-#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                                                  0xF /* R---V */
-#define NV_MMU_VER1_PDE                                                      /* ----G */
-#define NV_MMU_VER1_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
-#define NV_MMU_VER1_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
-#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_VER1_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_VER1_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_VER1_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER1_PDE__SIZE                                              8
-#define NV_MMU_VER1_PTE                                                      /* ----G */
-#define NV_MMU_VER1_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
-#define NV_MMU_VER1_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
-#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
-#define NV_MMU_VER1_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
-#define NV_MMU_VER1_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
-#define NV_MMU_VER1_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_VER1_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
-#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
-#define NV_MMU_VER1_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
-#define NV_MMU_VER1_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
-#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_VER1_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
-#define NV_MMU_VER1_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
-#define NV_MMU_VER1_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER1_PTE__SIZE                                             8
-#define NV_MMU_VER1_PTE_COMPTAGS_NONE                                    0x0 /*       */
-#define NV_MMU_VER1_PTE_COMPTAGS_1                                       0x1 /*       */
-#define NV_MMU_VER1_PTE_COMPTAGS_2                                       0x2 /*       */
-#define NV_MMU_NEW_PDE                                                      /* ----G */
-#define NV_MMU_NEW_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_NEW_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_NEW_PDE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_NEW_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_NEW_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_NO_ATS                                            5:5 /* RWXVF */
-#define NV_MMU_NEW_PDE_NO_ATS_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PDE_NO_ATS_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_NEW_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_PDE__SIZE                                              8
-#define NV_MMU_NEW_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS                                       5:5 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE                                  0x1 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE                                 0x0 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_NEW_DUAL_PDE__SIZE                                             16
-#define NV_MMU_NEW_PTE                                                      /* ----G */
-#define NV_MMU_NEW_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_NEW_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_NEW_PTE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_NEW_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_ENCRYPTED                                        4:4 /* RWXVF */
-#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_NEW_PTE_PRIVILEGE                                        5:5 /* RWXVF */
-#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_READ_ONLY                                        6:6 /* RWXVF */
-#define NV_MMU_NEW_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
-#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
-#define NV_MMU_NEW_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
-#define NV_MMU_NEW_PTE_KIND                                           63:56 /* RWXVF */
-#define NV_MMU_NEW_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_NEW_PTE__SIZE                                              8
-#define NV_MMU_VER2_PDE                                                      /* ----G */
-#define NV_MMU_VER2_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER2_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER2_PDE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_VER2_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER2_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_NO_ATS                                           5:5 /* RWXVF */
-#define NV_MMU_VER2_PDE_NO_ATS_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_PDE_NO_ATS_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_VER2_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_PDE__SIZE                                              8
-#define NV_MMU_VER2_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS                                      5:5 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE                                 0x1 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE                                0x0 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_VER2_DUAL_PDE__SIZE                                             16
-#define NV_MMU_VER2_PTE                                                      /* ----G */
-#define NV_MMU_VER2_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER2_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER2_PTE_VOL                                              3:3 /* RWXVF */
-#define NV_MMU_VER2_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_ENCRYPTED                                        4:4 /* RWXVF */
-#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
-#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
-#define NV_MMU_VER2_PTE_PRIVILEGE                                        5:5 /* RWXVF */
-#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_READ_ONLY                                        6:6 /* RWXVF */
-#define NV_MMU_VER2_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
-#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
-#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
-#define NV_MMU_VER2_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
-#define NV_MMU_VER2_PTE_KIND                                           63:56 /* RWXVF */
-#define NV_MMU_VER2_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER2_PTE__SIZE                                              8
-#define NV_MMU_VER3_PDE                                                      /* ----G */
-#define NV_MMU_VER3_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER3_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER3_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER3_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER3_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF                                                                        5:3 /* RWXVF */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_PDE_ADDRESS                                             51:12 /* RWXVF */
-#define NV_MMU_VER3_PDE_ADDRESS_SHIFT                                  0x0000000c /*       */
-#define NV_MMU_VER3_PDE__SIZE                                              8
-#define NV_MMU_VER3_DUAL_PDE                                                      /* ----G */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG                                                                        5:3 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG                                     51:8 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL                                                                      69:67 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED               0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED                                        0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED                                             0x00000000 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED                                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED                                              0x00000001 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED       0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED                                    0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED                                         0x00000002 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED      0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED                                  0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED                                          0x00000003 /* RW--V */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL                                 115:76 /* RWXVF */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
-#define NV_MMU_VER3_DUAL_PDE__SIZE                                             16
-#define NV_MMU_VER3_PTE                                                      /* ----G */
-#define NV_MMU_VER3_PTE_VALID                                            0:0 /* RWXVF */
-#define NV_MMU_VER3_PTE_VALID_TRUE                                       0x1 /* RW--V */
-#define NV_MMU_VER3_PTE_VALID_FALSE                                      0x0 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE                                         2:1 /* RWXVF */
-#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF                                                                        7:3 /* RWXVF */
-#define NV_MMU_VER3_PTE_PCF_INVALID                                                         0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_SPARSE                                                          0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE                                                 0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE                                               0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE                                    0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE                                  0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE                                  0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE                                0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE                                    0x00000004 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE                                   0x00000005 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE                                  0x00000006 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE                                0x00000007 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE                                 0x00000008 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE                               0x00000009 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE                               0x0000000A /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE                             0x0000000B /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE                                 0x0000000C /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE                               0x0000000D /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE                               0x0000000E /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE                             0x0000000F /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD                                    0x00000010 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD                                  0x00000011 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD                                  0x00000012 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD                                0x00000013 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD                                    0x00000014 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD                                  0x00000015 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD                                  0x00000016 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD                                0x00000017 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD                                 0x00000018 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD                               0x00000019 /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD                               0x0000001A /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD                             0x0000001B /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD                                 0x0000001C /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD                               0x0000001D /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD                               0x0000001E /* RW--V */
-#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD                             0x0000001F /* RW--V */
-#define NV_MMU_VER3_PTE_KIND                                           11:8 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS                                         51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_SYS                                     51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_PEER                                    51:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_ADDRESS_VID                                     39:12 /* RWXVF */
-#define NV_MMU_VER3_PTE_PEER_ID                63:(64-3) /* RWXVF */
-#define NV_MMU_VER3_PTE_PEER_ID_0                                 0x00000000 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_1                                 0x00000001 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_2                                 0x00000002 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_3                                 0x00000003 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_4                                 0x00000004 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_5                                 0x00000005 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_6                                 0x00000006 /* RW--V */
-#define NV_MMU_VER3_PTE_PEER_ID_7                                 0x00000007 /* RW--V */
-#define NV_MMU_VER3_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
-#define NV_MMU_VER3_PTE__SIZE                                              8
-#define NV_MMU_CLIENT                                             /* ----G */
-#define NV_MMU_CLIENT_KIND                                    2:0 /* RWXVF */
-#define NV_MMU_CLIENT_KIND_Z16                                0x1 /* R---V */
-#define NV_MMU_CLIENT_KIND_S8                                 0x2 /* R---V */
-#define NV_MMU_CLIENT_KIND_S8Z24                              0x3 /* R---V */
-#define NV_MMU_CLIENT_KIND_ZF32_X24S8                         0x4 /* R---V */
-#define NV_MMU_CLIENT_KIND_Z24S8                              0x5 /* R---V */
-#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY                     0x6 /* R---V */
-#define NV_MMU_CLIENT_KIND_INVALID                            0x7 /* R---V */
-#endif // __gb100_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -81,7 +81,7 @@
 #define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)

 // This exists in order to have a function to place a breakpoint on:
-void on_nvq_assert(void)
+static void on_nvq_assert(void)
 {
    (void)NULL;
 }
--- a/kernel-open/nvidia-uvm/nv-kthread-q.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q.c
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),

        // Ran out of attempts - return thread even if its stack may not be
        // allocated on the preferred node
-        if ((i == (attempts - 1)))
+        if (i == (attempts - 1))
            break;

        // Get the NUMA node where the first page of the stack is resident. If
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -6,10 +6,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
@@ -76,7 +72,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_fault_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -114,7 +114,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
 NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
 NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
-NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2024 NVIDIA Corporation
+    Copyright (c) 2013-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -58,7 +58,7 @@
 #ifndef _UVM_H_
 #define _UVM_H_

-#define UVM_API_LATEST_REVISION 12
+#define UVM_API_LATEST_REVISION 11

 #if !defined(UVM_API_REVISION)
 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@@ -167,7 +167,7 @@ NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist);
 //
 // Error codes:
 //     NV_ERR_NOT_SUPPORTED:
-//         The kernel is not able to support UVM. This could be because
+//         The Linux kernel is not able to support UVM. This could be because
 //         the kernel is too old, or because it lacks a feature that UVM
 //         requires. The kernel log will have details.
 //
@@ -3473,7 +3473,8 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
 //

 #if UVM_API_REV_IS_AT_MOST(10)
-// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
+// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
+// sizeof(UvmToolsEventControlData_V2).
 NvLength UvmToolsGetEventControlSize(void);

 // This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
@@ -3496,7 +3497,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //
 //     version: (INPUT)
 //         Requested version for events or counters.
-//         See UvmToolsEventQueueVersion.
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
+//         UvmToolsEventControlData_V2::version records the entry version that
+//         will be generated.
 //
 //     event_buffer: (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
@@ -3509,7 +3512,8 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //
 //     event_control (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
-//         hold UvmToolsEventControlData (although single page-size allocation
+//         hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
+//         UvmToolsEventControlData_V2 (although single page-size allocation
 //         should be more than enough). Gets pinned until queue is destroyed.
 //
 //     queue: (OUTPUT)
@@ -3520,16 +3524,10 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //         Session handle does not refer to a valid session
 //
 //     NV_ERR_INVALID_ARGUMENT:
-//         The version is not UvmToolsEventQueueVersion_V1 or
-//         UvmToolsEventQueueVersion_V2.
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
 //         One of the parameters: event_buffer, event_buffer_size, event_control
 //         is not valid
 //
-//     NV_ERR_NOT_SUPPORTED:
-//         The requested version queue could not be created
-//         (i.e., the UVM kernel driver is older and doesn't support
-//         UvmToolsEventQueueVersion_V2).
-//
 //     NV_ERR_INSUFFICIENT_RESOURCES:
 //         There could be multiple reasons for this error. One would be that
 //         it's not possible to allocate a queue of requested size. Another
@@ -3982,51 +3980,57 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle  session,
 //     version: (INPUT)
 //         Requested version for the UUID table returned. The version must
 //         match the requested version of the event queue created with
-//         UvmToolsCreateEventQueue(). See UvmToolsEventQueueVersion.
-//         If the version of the event queue does not match the version of the
-//         UUID table, the behavior is undefined.
+//         UvmToolsCreateEventQueue().
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
 //
 //     table: (OUTPUT)
 //         Array of processor UUIDs, including the CPU's UUID which is always
-//         at index zero. The number of elements in the array must be greater
-//         or equal to UVM_MAX_PROCESSORS_V1 if the version is
-//         UvmToolsEventQueueVersion_V1 and UVM_MAX_PROCESSORS if the version is
-//         UvmToolsEventQueueVersion_V2.
-//         The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
-//         index this array. Unused indices will have a UUID of zero.
-//         If version is UvmToolsEventQueueVersion_V1 then the reported UUID
-//         will be that of the corresponding physical GPU, even if multiple SMC
-//         partitions are registered under that physical GPU. If version is
-//         UvmToolsEventQueueVersion_V2 then the reported UUID will be the GPU
-//         instance UUID if SMC is enabled, otherwise it will be the UUID of
-//         the physical GPU.
+//         at index zero.  The srcIndex and dstIndex fields of the
+//         UvmEventMigrationInfo struct index this array.  Unused indices will
+//         have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
+//         for the UUID of the physical GPU and only supports a single SMC
+//         partition registered per process. Version UvmEventEntry_V2 supports
+//         multiple SMC partitions registered per process and uses physical GPU
+//         UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
+//         UUIDs for SMC partitions.
+//         The table pointer can be NULL in which case, the size of the table
+//         needed to hold all the UUIDs is returned in 'count'.
+//
+//     table_size: (INPUT)
+//         The size of the table in number of array elements. This can be
+//         zero if the table pointer is NULL.
+//
+//     count: (OUTPUT)
+//         On output, it is set by UVM to the number of UUIDs needed to hold
+//         all the UUIDs, including any gaps in the table due to unregistered
+//         GPUs.
 //
 // Error codes:
 //     NV_ERR_INVALID_ADDRESS:
-//         writing to table failed.
+//         writing to table failed or the count pointer was invalid.
 //
 //     NV_ERR_INVALID_ARGUMENT:
-//         The version is not UvmToolsEventQueueVersion_V1 or
-//         UvmToolsEventQueueVersion_V2.
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
+//         The count pointer is NULL.
+//         See UvmToolsEventQueueVersion.
 //
-//     NV_ERR_NOT_SUPPORTED:
-//         The kernel is not able to support the requested version
-//         (i.e., the UVM kernel driver is older and doesn't support
-//         UvmToolsEventQueueVersion_V2).
+//     NV_WARN_MISMATCHED_TARGET:
+//         The kernel returned a table suitable for UvmEventEntry_V1 events.
+//         (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
 //
 //     NV_ERR_NO_MEMORY:
 //         Internal memory allocation failed.
 //------------------------------------------------------------------------------
-#if UVM_API_REV_IS_AT_MOST(11)
+#if UVM_API_REV_IS_AT_MOST(10)
+NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle  session,
+                                        NvProcessorUuid       *table,
+                                        NvLength              *count);
+#else
 NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle      session,
                                        UvmToolsEventQueueVersion  version,
                                        NvProcessorUuid           *table,
                                        NvLength                   table_size,
                                        NvLength                  *count);
-#else
-NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle     session,
-                                        UvmToolsEventQueueVersion version,
-                                        NvProcessorUuid          *table);
 #endif

 //------------------------------------------------------------------------------
--- a/kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.c
@@ -1,75 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_linux.h"
-#include "uvm_global.h"
-#include "uvm_gpu.h"
-#include "uvm_hal.h"
-#include "hwref/ampere/ga100/dev_fault.h"
-
-static bool client_id_ce(NvU16 client_id)
-{
-    if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE9)
-        return true;
-
-    if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE10 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE15)
-        return true;
-
-    switch (client_id) {
-        case NV_PFAULT_CLIENT_HUB_CE0:
-        case NV_PFAULT_CLIENT_HUB_CE1:
-        case NV_PFAULT_CLIENT_HUB_CE2:
-            return true;
-    }
-
-    return false;
-}
-
-uvm_mmu_engine_type_t uvm_hal_ampere_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                      uvm_fault_client_type_t client_type,
-                                                                      NvU16 client_id)
-{
-    // Servicing CE and Host (HUB clients) faults.
-    if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
-        if (client_id_ce(client_id)) {
-            UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9);
-
-            return UVM_MMU_ENGINE_TYPE_CE;
-        }
-
-        if (client_id == NV_PFAULT_CLIENT_HUB_HOST || client_id == NV_PFAULT_CLIENT_HUB_ESC) {
-            UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31);
-
-            return UVM_MMU_ENGINE_TYPE_HOST;
-        }
-    }
-
-    // We shouldn't be servicing faults from any other engines other than GR.
-    UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
-    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
-                   "Unexpected engine ID: 0x%x\n",
-                   mmu_engine_id);
-    UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
-
-    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
-}
--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -205,18 +205,17 @@ void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
                     CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
 }

-// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
 void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
-                                           uvm_gpu_phys_address_t pdb,
-                                           NvU32 depth,
-                                           uvm_membar_t membar)
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            uvm_membar_t membar)
 {
    NvU32 aperture_value;
    NvU32 page_table_level;
    NvU32 pdb_lo;
    NvU32 pdb_hi;
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;

    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);

@@ -231,8 +230,8 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
    pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);

-    // PDE3 is the highest level on Pascal-Ampere, see the comment in
-    // uvm_pascal_mmu.c for details.
+    // PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
+    // for details.
    UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
    page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;

@@ -243,12 +242,7 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
-    NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
+    NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                     MEM_OP_B, 0,
                     MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
@@ -261,18 +255,16 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
                     MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
                               HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));

-    // GPU membar still requires an explicit membar method.
-    if (membar == UVM_MEMBAR_GPU)
-        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }

-// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
 void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
                                           uvm_gpu_phys_address_t pdb,
                                           NvU32 depth,
                                           NvU64 base,
                                           NvU64 size,
-                                           NvU64 page_size,
+                                           NvU32 page_size,
                                           uvm_membar_t membar)
 {
    NvU32 aperture_value;
@@ -280,7 +272,6 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
    NvU32 pdb_lo;
    NvU32 pdb_hi;
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
    NvU32 va_lo;
    NvU32 va_hi;
    NvU64 end;
@@ -290,9 +281,9 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
    NvU32 log2_invalidation_size;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
    UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);

    // The invalidation size must be a power-of-two number of pages containing
@@ -334,7 +325,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
    pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);

-    // PDE3 is the highest level on Pascal-Ampere, see the comment in
+    // PDE3 is the highest level on Pascal-Ampere , see the comment in
    // uvm_pascal_mmu.c for details.
    UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
    page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
@@ -346,15 +337,10 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
    NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
-                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
-                               sysmembar_value |
-                               HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
+                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                               HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
+                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                     MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
                     MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
                               HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
@@ -366,23 +352,21 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
                     MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
                               HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));

-    // GPU membar still requires an explicit membar method.
-    if (membar == UVM_MEMBAR_GPU)
-        gpu->parent->host_hal->membar_gpu(push);
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }

-// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
 void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params)
 {
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
    NvU32 invalidate_gpc_value = 0;
    NvU32 aperture_value = 0;
    NvU32 pdb_lo = 0;
    NvU32 pdb_hi = 0;
    NvU32 page_table_level = 0;
+    uvm_membar_t membar;

    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
    if (pdb.aperture == UVM_APERTURE_VID)
@@ -397,7 +381,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);

    if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
-        // PDE3 is the highest level on Pascal-Ampere, see the comment in
+        // PDE3 is the highest level on Pascal, see the comment in
        // uvm_pascal_mmu.c for details.
        page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
    }
@@ -409,11 +393,6 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (params->membar == UvmInvalidateTlbMemBarSys)
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
    if (params->disable_gpc_invalidate)
        invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
    else
@@ -424,9 +403,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,

        NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
        NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
-        NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
-                                   HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
-                                   HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
+        NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                                   HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
+                                   HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                         MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
                         MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
                                   HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
@@ -439,7 +418,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
                                   HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
    }
    else {
-        NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
+        NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                                   HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                         MEM_OP_B, 0,
                         MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
@@ -453,7 +432,12 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
                                   HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
    }

-    // GPU membar still requires an explicit membar method.
-    if (params->membar == UvmInvalidateTlbMemBarLocal)
-        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
+    if (params->membar == UvmInvalidateTlbMemBarSys)
+        membar = UVM_MEMBAR_SYS;
+    else if (params->membar == UvmInvalidateTlbMemBarLocal)
+        membar = UVM_MEMBAR_GPU;
+    else
+        membar = UVM_MEMBAR_NONE;
+
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2020 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -36,7 +36,22 @@
 #include "uvm_ampere_fault_buffer.h"
 #include "hwref/ampere/ga100/dev_fault.h"

-static NvU32 page_table_depth_ampere(NvU64 page_size)
+uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
+{
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31)
+        return UVM_MMU_ENGINE_TYPE_HOST;
+
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
+        return UVM_MMU_ENGINE_TYPE_CE;
+
+    // We shouldn't be servicing faults from any other engines
+    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
+                   "Unexpected engine ID: 0x%x\n", mmu_engine_id);
+
+    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
+}
+
+static NvU32 page_table_depth_ampere(NvU32 page_size)
 {
    // The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
    if (page_size == UVM_PAGE_SIZE_2M)
@@ -47,14 +62,14 @@ static NvU32 page_table_depth_ampere(NvU64 page_size)
        return 4;
 }

-static NvU64 page_sizes_ampere(void)
+static NvU32 page_sizes_ampere(void)
 {
    return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
 }

 static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;

-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size)
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
 {
    static bool initialized = false;

--- a/kernel-open/nvidia-uvm/uvm_ats.c
+++ b/kernel-open/nvidia-uvm/uvm_ats.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2021 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2021 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -29,9 +29,10 @@
 #include "uvm_ats_ibm.h"
 #include "nv_uvm_types.h"
 #include "uvm_lock.h"
-#include "uvm_ats_sva.h"

-#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
+    #include "uvm_ats_sva.h"
+
+    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())

 typedef struct
 {
--- a/kernel-open/nvidia-uvm/uvm_blackwell.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell.c
@@ -1,105 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2022-2023 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_global.h"
-#include "uvm_hal.h"
-#include "uvm_gpu.h"
-#include "uvm_mem.h"
-#include "uvm_blackwell_fault_buffer.h"
-
-void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
-{
-    parent_gpu->tlb_batch.va_invalidate_supported = true;
-
-    parent_gpu->tlb_batch.va_range_invalidate_supported = true;
-
-    // TODO: Bug 1767241: Run benchmarks to figure out a good number
-    parent_gpu->tlb_batch.max_ranges = 8;
-
-    parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
-
-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
-    {
-        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
-    }
-
-    // A single top level PDE on Blackwell covers 64 PB and that's the minimum
-    // size that can be used.
-    parent_gpu->rm_va_base = 0;
-    parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
-
-    parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
-    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
-
-    // See uvm_mmu.h for mapping placement
-    parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
-
-    // TODO: Bug 3953852: Set this to true pending Blackwell changes
-    parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
-
-    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
-
-    // All GR context buffers may be mapped to 57b wide VAs. All "compute" units
-    // accessing GR context buffers support the 57-bit VA range.
-    parent_gpu->max_channel_va = 1ull << 57;
-
-    parent_gpu->max_host_va = 1ull << 57;
-
-    // Blackwell can map sysmem with any page size
-    parent_gpu->can_map_sysmem_with_large_pages = true;
-
-    // Prefetch instructions will generate faults
-    parent_gpu->prefetch_fault_supported = true;
-
-    // Blackwell can place GPFIFO in vidmem
-    parent_gpu->gpfifo_in_vidmem_supported = true;
-
-    parent_gpu->replayable_faults_supported = true;
-
-    parent_gpu->non_replayable_faults_supported = true;
-
-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
-    parent_gpu->fault_cancel_va_supported = true;
-
-    parent_gpu->scoped_atomics_supported = true;
-
-    parent_gpu->has_clear_faulted_channel_sw_method = true;
-
-    parent_gpu->has_clear_faulted_channel_method = false;
-
-    parent_gpu->smc.supported = true;
-
-    parent_gpu->sparse_mappings_supported = true;
-
-    parent_gpu->map_remap_larger_page_promotion = false;
-
-    parent_gpu->plc_supported = true;
-
-    parent_gpu->no_ats_range_required = true;
-}
--- a/kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.c
@@ -1,122 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2023-2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_linux.h"
-#include "uvm_global.h"
-#include "uvm_gpu.h"
-#include "uvm_hal.h"
-#include "uvm_hal_types.h"
-#include "hwref/blackwell/gb100/dev_fault.h"
-#include "clc369.h"
-
-// NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE fault type is deprecated on
-// Blackwell.
-uvm_fault_type_t uvm_hal_blackwell_fault_buffer_get_fault_type(const NvU32 *fault_entry)
-{
-    NvU32 hw_fault_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, FAULT_TYPE);
-
-    switch (hw_fault_type_value) {
-        case NV_PFAULT_FAULT_TYPE_PDE:
-            return UVM_FAULT_TYPE_INVALID_PDE;
-        case NV_PFAULT_FAULT_TYPE_PTE:
-            return UVM_FAULT_TYPE_INVALID_PTE;
-        case NV_PFAULT_FAULT_TYPE_RO_VIOLATION:
-            return UVM_FAULT_TYPE_WRITE;
-        case NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION:
-            return UVM_FAULT_TYPE_ATOMIC;
-        case NV_PFAULT_FAULT_TYPE_WO_VIOLATION:
-            return UVM_FAULT_TYPE_READ;
-
-        case NV_PFAULT_FAULT_TYPE_PDE_SIZE:
-            return UVM_FAULT_TYPE_PDE_SIZE;
-        case NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION:
-            return UVM_FAULT_TYPE_VA_LIMIT_VIOLATION;
-        case NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK:
-            return UVM_FAULT_TYPE_UNBOUND_INST_BLOCK;
-        case NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION:
-            return UVM_FAULT_TYPE_PRIV_VIOLATION;
-        case NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION:
-            return UVM_FAULT_TYPE_PITCH_MASK_VIOLATION;
-        case NV_PFAULT_FAULT_TYPE_WORK_CREATION:
-            return UVM_FAULT_TYPE_WORK_CREATION;
-        case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE:
-            return UVM_FAULT_TYPE_UNSUPPORTED_APERTURE;
-        case NV_PFAULT_FAULT_TYPE_CC_VIOLATION:
-            return UVM_FAULT_TYPE_CC_VIOLATION;
-        case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND:
-            return UVM_FAULT_TYPE_UNSUPPORTED_KIND;
-        case NV_PFAULT_FAULT_TYPE_REGION_VIOLATION:
-            return UVM_FAULT_TYPE_REGION_VIOLATION;
-        case NV_PFAULT_FAULT_TYPE_POISONED:
-            return UVM_FAULT_TYPE_POISONED;
-    }
-
-    UVM_ASSERT_MSG(false, "Invalid fault type value: %d\n", hw_fault_type_value);
-
-    return UVM_FAULT_TYPE_COUNT;
-}
-
-static bool client_id_ce(NvU16 client_id)
-{
-    if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE7)
-        return true;
-
-    switch (client_id) {
-        case NV_PFAULT_CLIENT_HUB_CE0:
-        case NV_PFAULT_CLIENT_HUB_CE1:
-        case NV_PFAULT_CLIENT_HUB_CE2:
-        case NV_PFAULT_CLIENT_HUB_CE3:
-            return true;
-    }
-
-    return false;
-}
-
-uvm_mmu_engine_type_t uvm_hal_blackwell_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                         uvm_fault_client_type_t client_type,
-                                                                         NvU16 client_id)
-{
-    // Servicing CE and Host (HUB clients) faults.
-    if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
-        if (client_id_ce(client_id)) {
-            UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE19);
-
-            return UVM_MMU_ENGINE_TYPE_CE;
-        }
-
-        if (client_id == NV_PFAULT_CLIENT_HUB_HOST ||
-            (client_id >= NV_PFAULT_CLIENT_HUB_ESC0 && client_id <= NV_PFAULT_CLIENT_HUB_ESC11)) {
-            UVM_ASSERT((mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44) ||
-                       (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS));
-
-            return UVM_MMU_ENGINE_TYPE_HOST;
-        }
-    }
-
-    // We shouldn't be servicing faults from any other engines other than GR.
-    UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
-    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
-    UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
-
-    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
-}
--- a/kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.h
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_fault_buffer.h
@@ -1,92 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2022 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#ifndef __UVM_HAL_BLACKWELL_FAULT_BUFFER_H__
-#define __UVM_HAL_BLACKWELL_FAULT_BUFFER_H__
-
-#include "nvtypes.h"
-#include "uvm_common.h"
-#include "uvm_gpu.h"
-
-// There are up to 10 TPCs per GPC in Blackwell, and there are 2 LTP uTLBs per
-// TPC. Besides, there is one active RGG uTLB per GPC. Each TPC has a number of
-// clients that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests
-// from these units are routed as follows to the 2 LTP uTLBs:
-//
-// --------                    ---------
-// | T1_0 | -----------------> | uTLB0 |
-// --------                    ---------
-//
-// --------                    ---------
-// | T1_1 | -----------------> | uTLB1 |
-// --------          --------> ---------
-//                   |             ^
-// -------           |             |
-// | PE  | -----------             |
-// -------                         |
-//                                 |
-// ---------                       |
-// | TPCCS | -----------------------
-// ---------
-//
-//
-// The client ids are local to their GPC and the id mapping is linear across
-// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
-//
-// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
-// be ignored. These will never be reported in a fault message, and should
-// never be used in an invalidate. Therefore, we define our own values.
-typedef enum {
-    UVM_BLACKWELL_GPC_UTLB_ID_RGG = 0,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP0 = 1,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP1 = 2,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP2 = 3,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP3 = 4,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP4 = 5,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP5 = 6,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP6 = 7,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP7 = 8,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP8 = 9,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP9 = 10,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP10 = 11,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP11 = 12,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP12 = 13,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP13 = 14,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP14 = 15,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP15 = 16,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP16 = 17,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP17 = 18,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP18 = 19,
-    UVM_BLACKWELL_GPC_UTLB_ID_LTP19 = 20,
-
-    UVM_BLACKWELL_GPC_UTLB_COUNT,
-} uvm_blackwell_gpc_utlb_id_t;
-
-static NvU32 uvm_blackwell_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
-{
-    NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
-    UVM_ASSERT(utlbs <= UVM_BLACKWELL_GPC_UTLB_COUNT);
-    return utlbs;
-}
-
-#endif
--- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c
@@ -1,256 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_hal.h"
-#include "uvm_push.h"
-#include "uvm_push_macros.h"
-#include "clc96f.h"
-
-// TODO: Bug 3210931: Rename HOST references and files to ESCHED.
-
-void uvm_hal_blackwell_host_tlb_invalidate_all(uvm_push_t *push,
-                                               uvm_gpu_phys_address_t pdb,
-                                               NvU32 depth,
-                                               uvm_membar_t membar)
-{
-    NvU32 aperture_value;
-    NvU32 page_table_level;
-    NvU32 pdb_lo;
-    NvU32 pdb_hi;
-    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
-
-    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
-
-    if (pdb.aperture == UVM_APERTURE_VID)
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
-    else
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
-
-    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
-    pdb.address >>= 12;
-
-    pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-    pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-
-    // PDE4 is the highest level on Blackwell, see the comment in
-    // uvm_blackwell_mmu.c for details.
-    UVM_ASSERT_MSG(depth < NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
-    page_table_level = NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
-
-    if (membar != UVM_MEMBAR_NONE)
-        ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
-
-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
-    NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
-                               HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
-                     MEM_OP_B, 0,
-                     MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
-                               HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
-                               HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
-                               HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
-                               HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
-                               aperture_value |
-                               ack_value,
-                     MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
-                               HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
-}
-
-void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
-                                              uvm_gpu_phys_address_t pdb,
-                                              NvU32 depth,
-                                              NvU64 base,
-                                              NvU64 size,
-                                              NvU64 page_size,
-                                              uvm_membar_t membar)
-{
-    NvU32 aperture_value;
-    NvU32 page_table_level;
-    NvU32 pdb_lo;
-    NvU32 pdb_hi;
-    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
-    NvU32 va_lo;
-    NvU32 va_hi;
-    NvU64 end;
-    NvU64 actual_base;
-    NvU64 actual_size;
-    NvU64 actual_end;
-    NvU32 log2_invalidation_size;
-    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-
-    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
-    UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
-
-    // The invalidation size must be a power-of-two number of pages containing
-    // the passed interval
-    end = base + size - 1;
-    log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
-
-    if (log2_invalidation_size == 64) {
-        // Invalidate everything
-        gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
-        return;
-    }
-
-    // The hardware aligns the target address down to the invalidation size.
-    actual_size = 1ULL << log2_invalidation_size;
-    actual_base = UVM_ALIGN_DOWN(base, actual_size);
-    actual_end = actual_base + actual_size - 1;
-    UVM_ASSERT(actual_end >= end);
-
-    // The invalidation size field expects log2(invalidation size in 4K), not
-    // log2(invalidation size in bytes)
-    log2_invalidation_size -= 12;
-
-    // Address to invalidate, as a multiple of 4K.
-    base >>= 12;
-    va_lo = base & HWMASK(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
-    va_hi = base >> HWSIZE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
-
-    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
-
-    if (pdb.aperture == UVM_APERTURE_VID)
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
-    else
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
-
-    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
-    pdb.address >>= 12;
-
-    pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-    pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-
-    // PDE4 is the highest level on Blackwell, see the comment in
-    // uvm_blackwell_mmu.c for details.
-    UVM_ASSERT_MSG(depth < NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
-    page_table_level = NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
-
-    if (membar != UVM_MEMBAR_NONE)
-        ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
-
-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
-    NV_PUSH_4U(C96F, MEM_OP_A, HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
-                               sysmembar_value |
-                               HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
-                               HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
-                     MEM_OP_B, HWVALUE(C96F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
-                     MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
-                               HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
-                               HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
-                               HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
-                               HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
-                               aperture_value |
-                               ack_value,
-                     MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
-                               HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
-}
-
-void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
-                                                uvm_gpu_phys_address_t pdb,
-                                                UVM_TEST_INVALIDATE_TLB_PARAMS *params)
-{
-    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
-    NvU32 invalidate_gpc_value = 0;
-    NvU32 aperture_value = 0;
-    NvU32 pdb_lo = 0;
-    NvU32 pdb_hi = 0;
-    NvU32 page_table_level = 0;
-
-    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
-    if (pdb.aperture == UVM_APERTURE_VID)
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
-    else
-        aperture_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
-
-    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
-    pdb.address >>= 12;
-
-    pdb_lo = pdb.address & HWMASK(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-    pdb_hi = pdb.address >> HWSIZE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
-
-    if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
-        // PDE4 is the highest level on Blackwell, see the comment in
-        // uvm_blackwell_mmu.c for details.
-        page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde4, params->page_table_level) - 1;
-    }
-
-    if (params->membar != UvmInvalidateTlbMemBarNone)
-        ack_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
-
-    if (params->membar == UvmInvalidateTlbMemBarSys)
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
-    if (params->disable_gpc_invalidate)
-        invalidate_gpc_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
-    else
-        invalidate_gpc_value = HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
-
-    if (params->target_va_mode == UvmTargetVaModeTargeted) {
-        NvU64 va = params->va >> 12;
-
-        NvU32 va_lo = va & HWMASK(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
-        NvU32 va_hi = va >> HWSIZE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
-
-        NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
-                                   HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
-                                   HWVALUE(C96F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
-                         MEM_OP_B, HWVALUE(C96F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
-                         MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
-                                   HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
-                                   HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
-                                   HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
-                                   invalidate_gpc_value |
-                                   aperture_value |
-                                   ack_value,
-                         MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
-                                   HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
-    }
-    else {
-        NV_PUSH_4U(C96F, MEM_OP_A, sysmembar_value |
-                                   HWCONST(C96F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
-                         MEM_OP_B, 0,
-                         MEM_OP_C, HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
-                                   HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
-                                   HWCONST(C96F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
-                                   HWVALUE(C96F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
-                                   invalidate_gpc_value |
-                                   aperture_value |
-                                   ack_value,
-                         MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
-                                   HWVALUE(C96F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
-    }
-}
--- a/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
@@ -1,165 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2022-2024 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-// On Blackwell, the UVM page tree 'depth' maps to hardware as follows:
-//
-// UVM depth   HW level                            VA bits
-// 0           PDE4                                56:56
-// 1           PDE3                                55:47
-// 2           PDE2 (or 256G PTE)                  46:38
-// 3           PDE1 (or 512M PTE)                  37:29
-// 4           PDE0 (dual 64K/4K PDE, or 2M PTE)   28:21
-// 5           PTE_64K / PTE_4K                    20:16 / 20:12
-
-#include "uvm_types.h"
-#include "uvm_global.h"
-#include "uvm_hal.h"
-#include "uvm_hal_types.h"
-#include "uvm_blackwell_fault_buffer.h"
-#include "hwref/blackwell/gb100/dev_fault.h"
-#include "hwref/blackwell/gb100/dev_mmu.h"
-
-static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
-
-static NvU32 page_table_depth_blackwell(NvU64 page_size)
-{
-    switch (page_size) {
-        case UVM_PAGE_SIZE_2M:
-            return 4;
-        case UVM_PAGE_SIZE_512M:
-            return 3;
-        case UVM_PAGE_SIZE_256G:
-            return 2;
-        default:
-            return 5;
-    }
-}
-
-static NvU64 page_sizes_blackwell(void)
-{
-    return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
-}
-
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
-{
-    static bool initialized = false;
-
-    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
-
-    // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
-    // 128K big page size for Pascal+ GPUs
-    if (big_page_size == UVM_PAGE_SIZE_128K)
-        return NULL;
-
-    if (!initialized) {
-        uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
-        UVM_ASSERT(hopper_mmu_mode_hal);
-
-        // The assumption made is that arch_hal->mmu_mode_hal() will be called
-        // under the global lock the first time, so check it here.
-        uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-        blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
-        blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
-        blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
-
-        initialized = true;
-    }
-
-    return &blackwell_mmu_mode_hal;
-}
-
-NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
-{
-    switch (client_id) {
-        case NV_PFAULT_CLIENT_GPC_RAST:
-        case NV_PFAULT_CLIENT_GPC_GCC:
-        case NV_PFAULT_CLIENT_GPC_GPCCS:
-            return UVM_BLACKWELL_GPC_UTLB_ID_RGG;
-        case NV_PFAULT_CLIENT_GPC_T1_0:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP0;
-        case NV_PFAULT_CLIENT_GPC_T1_1:
-        case NV_PFAULT_CLIENT_GPC_PE_0:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_0:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP1;
-        case NV_PFAULT_CLIENT_GPC_T1_2:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP2;
-        case NV_PFAULT_CLIENT_GPC_T1_3:
-        case NV_PFAULT_CLIENT_GPC_PE_1:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_1:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP3;
-        case NV_PFAULT_CLIENT_GPC_T1_4:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP4;
-        case NV_PFAULT_CLIENT_GPC_T1_5:
-        case NV_PFAULT_CLIENT_GPC_PE_2:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_2:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP5;
-        case NV_PFAULT_CLIENT_GPC_T1_6:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP6;
-        case NV_PFAULT_CLIENT_GPC_T1_7:
-        case NV_PFAULT_CLIENT_GPC_PE_3:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_3:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP7;
-        case NV_PFAULT_CLIENT_GPC_T1_8:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP8;
-        case NV_PFAULT_CLIENT_GPC_T1_9:
-        case NV_PFAULT_CLIENT_GPC_PE_4:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_4:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP9;
-        case NV_PFAULT_CLIENT_GPC_T1_10:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP10;
-        case NV_PFAULT_CLIENT_GPC_T1_11:
-        case NV_PFAULT_CLIENT_GPC_PE_5:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_5:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP11;
-        case NV_PFAULT_CLIENT_GPC_T1_12:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP12;
-        case NV_PFAULT_CLIENT_GPC_T1_13:
-        case NV_PFAULT_CLIENT_GPC_PE_6:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_6:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP13;
-        case NV_PFAULT_CLIENT_GPC_T1_14:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP14;
-        case NV_PFAULT_CLIENT_GPC_T1_15:
-        case NV_PFAULT_CLIENT_GPC_PE_7:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_7:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP15;
-        case NV_PFAULT_CLIENT_GPC_T1_16:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP16;
-        case NV_PFAULT_CLIENT_GPC_T1_17:
-        case NV_PFAULT_CLIENT_GPC_PE_8:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_8:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP17;
-        case NV_PFAULT_CLIENT_GPC_T1_18:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP18;
-        case NV_PFAULT_CLIENT_GPC_T1_19:
-        case NV_PFAULT_CLIENT_GPC_PE_9:
-        case NV_PFAULT_CLIENT_GPC_TPCCS_9:
-            return UVM_BLACKWELL_GPC_UTLB_ID_LTP19;
-
-        default:
-            UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
-    }
-
-    return 0;
-}
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
                                      uvm_mem_t *dst_mem,
                                      uvm_mem_t *src_mem,
                                      const UvmCslIv *decrypt_iv,
+                                      NvU32 key_version,
                                      uvm_mem_t *auth_tag_mem,
                                      size_t size,
                                      NvU32 copy_size)
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
                                                         dst_plain + i * copy_size,
                                                         src_cipher + i * copy_size,
                                                         decrypt_iv + i,
+                                                         key_version,
                                                         copy_size,
                                                         auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
    }
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
                                          uvm_mem_t *dst_mem,
                                          uvm_mem_t *src_mem,
                                          const UvmCslIv *decrypt_iv,
+                                          NvU32 key_version,
                                          uvm_mem_t *auth_tag_mem,
                                          size_t size,
                                          NvU32 copy_size)
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
                                                         dst_plain + i * copy_size,
                                                         src_cipher + i * copy_size,
                                                         decrypt_iv + i,
+                                                         key_version,
                                                         copy_size,
                                                         auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
    }
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
                                                          i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
                                                          dst_cipher);

-        uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
+        uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);

        if (i > 0)
            uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
    size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
    UvmCslIv *decrypt_iv = NULL;
    UvmCslIv *encrypt_iv = NULL;
+    NvU32 key_version;
    uvm_tracker_t tracker;
    size_t src_plain_size;

@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,

    gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);

+    // There shouldn't be any key rotation between the end of the push and the
+    // CPU decryption(s), but it is more robust against test changes to force
+    // decryption to use the saved key.
+    key_version = uvm_channel_pool_key_version(push.channel->pool);
+
    TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);

    TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
                                                dst_plain,
                                                dst_cipher,
                                                decrypt_iv,
+                                                key_version,
                                                auth_tag_mem,
                                                size,
                                                copy_size),
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
                                                    dst_plain,
                                                    dst_cipher,
                                                    decrypt_iv,
+                                                    key_version,
                                                    auth_tag_mem,
                                                    size,
                                                    copy_size),
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -228,21 +228,65 @@ typedef struct
    // variant is required when the thread holding the pool lock must sleep
    // (ex: acquire another mutex) deeper in the call stack, either in UVM or
    // RM.
-    union {
+    union
+    {
        uvm_spinlock_t spinlock;
        uvm_mutex_t mutex;
    };

-    // Secure operations require that uvm_push_begin order matches
-    // uvm_push_end order, because the engine's state is used in its internal
-    // operation and each push may modify this state. push_locks is protected by
-    // the channel pool lock.
-    DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
+    struct
+    {
+        // Secure operations require that uvm_push_begin order matches
+        // uvm_push_end order, because the engine's state is used in its
+        // internal operation and each push may modify this state.
+        // push_locks is protected by the channel pool lock.
+        DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);

-    // Counting semaphore for available and unlocked channels, it must be
-    // acquired before submitting work to a channel when the Confidential
-    // Computing feature is enabled.
-    uvm_semaphore_t push_sem;
+        // Counting semaphore for available and unlocked channels, it must be
+        // acquired before submitting work to a channel when the Confidential
+        // Computing feature is enabled.
+        uvm_semaphore_t push_sem;
+
+        // Per channel buffers in unprotected sysmem.
+        uvm_rm_mem_t *pool_sysmem;
+
+        // Per channel buffers in protected vidmem.
+        uvm_rm_mem_t *pool_vidmem;
+
+       struct
+       {
+            // Current encryption key version, incremented upon key rotation.
+            // While there are separate keys for encryption and decryption, the
+            // two keys are rotated at once, so the versioning applies to both.
+            NvU32 version;
+
+            // Lock used to ensure mutual exclusion during key rotation.
+            uvm_mutex_t mutex;
+
+            // CSL contexts passed to RM for key rotation. This is usually an
+            // array containing the CSL contexts associated with the channels in
+            // the pool. In the case of the WLC pool, the array also includes
+            // CSL contexts associated with LCIC channels.
+            UvmCslContext **csl_contexts;
+
+            // Number of elements in the CSL context array.
+            unsigned num_csl_contexts;
+
+            // Number of bytes encrypted, or decrypted, on the engine associated
+            // with the pool since the last key rotation. Only used during
+            // testing, to force key rotations after a certain encryption size,
+            // see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
+            //
+            // Encryptions on a LCIC pool are accounted for in the paired WLC
+            // pool.
+            //
+            // TODO: Bug 4612912: these accounting variables can be removed once
+            // RM exposes an API to set the key rotation lower threshold.
+            atomic64_t encrypted;
+            atomic64_t decrypted;
+        } key_rotation;
+
+    } conf_computing;
 } uvm_channel_pool_t;

 struct uvm_channel_struct
@@ -322,43 +366,14 @@ struct uvm_channel_struct
        // work launches to match the order of push end-s that triggered them.
        volatile NvU32 gpu_put;

-        // Static pushbuffer for channels with static schedule (WLC/LCIC)
-        uvm_rm_mem_t *static_pb_protected_vidmem;
-
-        // Static pushbuffer staging buffer for WLC
-        uvm_rm_mem_t *static_pb_unprotected_sysmem;
-        void *static_pb_unprotected_sysmem_cpu;
-        void *static_pb_unprotected_sysmem_auth_tag_cpu;
-
-        // The above static locations are required by the WLC (and LCIC)
-        // schedule. Protected sysmem location completes WLC's independence
-        // from the pushbuffer allocator.
+        // Protected sysmem location makes WLC independent from the pushbuffer
+        // allocator. Unprotected sysmem and protected vidmem counterparts
+        // are allocated from the channel pool (sysmem, vidmem).
        void *static_pb_protected_sysmem;

-        // Static tracking semaphore notifier values
-        // Because of LCIC's fixed schedule, the secure semaphore release
-        // mechanism uses two additional static locations for incrementing the
-        // notifier values. See:
-        // . channel_semaphore_secure_release()
-        // . setup_lcic_schedule()
-        // . internal_channel_submit_work_wlc()
-        uvm_rm_mem_t *static_notifier_unprotected_sysmem;
-        NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
-        NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
-        uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
-        uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
-
-        // Explicit location for push launch tag used by WLC.
-        // Encryption auth tags have to be located in unprotected sysmem.
-        void *launch_auth_tag_cpu;
-        NvU64 launch_auth_tag_gpu_va;
-
        // Used to decrypt the push back to protected sysmem.
        // This happens when profilers register callbacks for migration data.
        uvm_push_crypto_bundle_t *push_crypto_bundles;
-
-        // Accompanying authentication tags for the crypto bundles
-        uvm_rm_mem_t *push_crypto_bundle_auth_tags;
    } conf_computing;

    // RM channel information
@@ -451,6 +466,16 @@ struct uvm_channel_manager_struct
        UVM_BUFFER_LOCATION gpput_loc;
        UVM_BUFFER_LOCATION pushbuffer_loc;
    } conf;
+
+    struct
+    {
+        // Flag indicating that the WLC/LCIC mechanism is ready/setup; should
+        // only be false during (de)initialization.
+        bool wlc_ready;
+
+        // True indicates that key rotation is enabled (UVM-wise).
+        bool key_rotation_enabled;
+    } conf_computing;
 };

 // Create a channel manager for the GPU
@@ -501,6 +526,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);

 uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);

+NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
+
+NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
+
+char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
+
+char *uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(uvm_channel_t *channel, unsigned tag_index);
+
 static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
 {
    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
@@ -532,6 +565,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
    return UVM_CHANNEL_TYPE_MEMOPS;
 }

+// Force key rotation in the engine associated with the given channel pool.
+// Rotation may still not happen if RM cannot acquire the necessary locks (in
+// which case the function returns NV_ERR_STATE_IN_USE).
+//
+// This function should be only invoked in pools in which key rotation is
+// enabled.
+NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
+
+// Retrieve the current encryption key version associated with the channel pool.
+NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
+
 // Privileged channels support all the Host and engine methods, while
 // non-privileged channels don't support privileged methods.
 //
@@ -579,12 +623,9 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
 // beginning.
 NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);

-// Check if WLC/LCIC mechanism is ready/setup
-// Should only return false during initialization
 static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
 {
-    return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
-           (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
+    return manager->conf_computing.wlc_ready;
 }
 // Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
 // associated with access_channel.
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -793,14 +793,11 @@ done:
 // This test verifies that concurrent pushes using the same channel pool
 // select different channels, when the Confidential Computing feature is
 // enabled.
-static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
+NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
 {
    NV_STATUS status = NV_OK;
-    uvm_channel_pool_t *pool;
-    uvm_push_t *pushes;
-    uvm_gpu_t *gpu;
-    NvU32 i;
-    NvU32 num_pushes;
+    uvm_push_t *pushes = NULL;
+    uvm_gpu_t *gpu = NULL;

    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;
@@ -810,9 +807,19 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
    for_each_va_space_gpu(gpu, va_space) {
        uvm_channel_type_t channel_type;

+        // Key rotation is disabled because this test relies on nested pushes,
+        // which is illegal. If any push other than the first one triggers key
+        // rotation, the test won't complete. This is because key rotation
+        // depends on waiting for ongoing pushes to end, which doesn't happen
+        // if those pushes are ended after the current one begins.
+        uvm_conf_computing_disable_key_rotation(gpu);
+
        for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
-            pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
-            TEST_CHECK_RET(pool != NULL);
+            NvU32 i;
+            NvU32 num_pushes;
+            uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
+
+            TEST_CHECK_GOTO(pool != NULL, error);

            // Skip LCIC channels as those can't accept any pushes
            if (uvm_channel_pool_is_lcic(pool))
@@ -824,7 +831,7 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
            num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);

            pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
-            TEST_CHECK_RET(pushes != NULL);
+            TEST_CHECK_GOTO(pushes != NULL, error);

            for (i = 0; i < num_pushes; i++) {
                uvm_push_t *push = &pushes[i];
@@ -841,19 +848,25 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)

            uvm_kvfree(pushes);
        }
+
+        uvm_conf_computing_enable_key_rotation(gpu);
    }

    uvm_thread_context_lock_enable_tracking();

    return status;
+
 error:
+    if (gpu != NULL)
+        uvm_conf_computing_enable_key_rotation(gpu);
+
    uvm_thread_context_lock_enable_tracking();
    uvm_kvfree(pushes);

    return status;
 }

-static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
+NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;

@@ -948,7 +961,319 @@ release:
    return NV_OK;
 }

-static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
+static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
+{
+    unsigned num_tries;
+    unsigned max_num_tries = 20;
+    unsigned num_rotations_completed = 0;
+
+    if (num_rotations == 0)
+        return NV_OK;
+
+    // The number of accepted rotations is kept low, so failed rotation
+    // invocations due to RM not acquiring the necessary locks (which imply a
+    // sleep in the test) do not balloon the test execution time.
+    UVM_ASSERT(num_rotations <= 10);
+
+    for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
+        // Force key rotation, irrespective of encryption usage.
+        NV_STATUS status = uvm_channel_pool_rotate_key(pool);
+
+        // Key rotation may not be able to complete due to RM failing to acquire
+        // the necessary locks. Detect the situation, sleep for a bit, and then
+        // try again
+        //
+        // The maximum time spent sleeping in a single rotation call is
+        // (max_num_tries * max_sleep_us)
+        if (status == NV_ERR_STATE_IN_USE) {
+            NvU32 min_sleep_us = 1000;
+            NvU32 max_sleep_us = 10000;
+
+            usleep_range(min_sleep_us, max_sleep_us);
+            continue;
+        }
+
+        TEST_NV_CHECK_RET(status);
+
+        num_rotations_completed++;
+    }
+
+    // If not a single key rotation occurred, the dependent tests still pass,
+    // but there is no much value to them. Instead, return an error so the
+    // maximum number of tries, or the maximum sleep time, are adjusted to
+    // ensure that at least one rotation completes.
+    if (num_rotations_completed > 0)
+        return NV_OK;
+    else
+        return NV_ERR_STATE_IN_USE;
+}
+
+static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
+{
+    return force_key_rotations(pool, 1);
+}
+
+// Test key rotation in all pools. This is useful because key rotation may not
+// happen otherwise on certain engines during UVM test execution. For example,
+// if the MEMOPS channel type is mapped to a CE not shared with any other
+// channel type, then the only encryption taking place in the engine is due to
+// semaphore releases (4 bytes each). This small encryption size makes it
+// unlikely to exceed even small rotation thresholds.
+static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
+{
+    uvm_channel_pool_t *pool;
+
+    uvm_for_each_pool(pool, gpu->channel_manager) {
+        if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
+            continue;
+
+        TEST_NV_CHECK_RET(force_key_rotation(pool));
+    }
+
+    return NV_OK;
+}
+
+// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
+// key rotations.
+static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
+{
+    int i;
+    uvm_channel_pool_t *gpu_to_cpu_pool;
+    uvm_channel_pool_t *cpu_to_gpu_pool;
+    NV_STATUS status = NV_OK;
+    size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
+    void *initial_plain_cpu = NULL;
+    void *final_plain_cpu = NULL;
+    uvm_mem_t *plain_gpu = NULL;
+    uvm_gpu_address_t plain_gpu_address;
+
+    cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
+    TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
+
+    gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+    TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
+
+    initial_plain_cpu = uvm_kvmalloc_zero(size);
+    if (initial_plain_cpu == NULL) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    final_plain_cpu = uvm_kvmalloc_zero(size);
+    if (final_plain_cpu == NULL) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
+    plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
+
+    memset(initial_plain_cpu, 1, size);
+
+    for (i = 0; i < 5; i++) {
+        TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
+        TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
+
+        TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
+                                                                      plain_gpu_address,
+                                                                      initial_plain_cpu,
+                                                                      size,
+                                                                      NULL,
+                                                                      "CPU > GPU"),
+                           out);
+
+        TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
+        TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
+
+        TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
+                                                                      final_plain_cpu,
+                                                                      plain_gpu_address,
+                                                                      size,
+                                                                      NULL,
+                                                                      "GPU > CPU"),
+                           out);
+
+        TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
+
+        memset(final_plain_cpu, 0, size);
+    }
+
+out:
+    uvm_mem_free(plain_gpu);
+    uvm_kvfree(final_plain_cpu);
+    uvm_kvfree(initial_plain_cpu);
+
+    return status;
+}
+
+static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
+{
+    uvm_push_t push;
+    uvm_gpu_address_t gpu_address;
+    uvm_gpu_t *gpu = mem->backing_gpu;
+
+    UVM_ASSERT(uvm_mem_is_vidmem(mem));
+
+    TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
+
+    gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
+    gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
+
+    TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
+
+    return NV_OK;
+}
+
+// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
+// testing to insert key rotations in between the push end, and the CPU
+// decryption
+static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
+                                              void *dst_plain,
+                                              uvm_gpu_address_t src_gpu_address,
+                                              size_t size,
+                                              unsigned num_rotations_to_insert)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+    uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
+    void *src_cipher, *auth_tag;
+    uvm_channel_t *channel;
+
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
+    UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
+
+    status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
+    if (status != NV_OK)
+        return status;
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
+    if (status != NV_OK)
+        goto out;
+
+    channel = push.channel;
+    uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
+    dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
+
+    dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
+    auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
+    gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
+
+    status = uvm_push_end_and_wait(&push);
+    if (status != NV_OK)
+        goto out;
+
+    TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
+
+    // If num_rotations_to_insert is not zero, the current encryption key will
+    // be different from the one used during CE encryption.
+
+    src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
+    auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
+    status = uvm_conf_computing_cpu_decrypt(channel,
+                                            dst_plain,
+                                            src_cipher,
+                                            dma_buffer->decrypt_iv,
+                                            dma_buffer->key_version[0],
+                                            size,
+                                            auth_tag);
+
+ out:
+    uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
+    return status;
+}
+
+static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
+                                                          unsigned num_repetitions,
+                                                          unsigned num_rotations_to_insert)
+{
+    unsigned i;
+    uvm_channel_pool_t *gpu_to_cpu_pool;
+    NV_STATUS status = NV_OK;
+    size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
+    NvU8 *plain_cpu = NULL;
+    uvm_mem_t *plain_gpu = NULL;
+    uvm_gpu_address_t plain_gpu_address;
+
+    if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
+        return NV_OK;
+
+    gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+    TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
+
+    plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
+    if (plain_cpu == NULL) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
+    TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
+
+    plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
+
+    for (i = 0; i < num_repetitions; i++) {
+        unsigned j;
+
+        TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
+                                                        plain_cpu,
+                                                        plain_gpu_address,
+                                                        size,
+                                                        num_rotations_to_insert),
+                          out);
+
+        for (j = 0; j < size; j++)
+            TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
+
+        memset(plain_cpu, 0, size);
+
+    }
+out:
+    uvm_mem_free(plain_gpu);
+    uvm_kvfree(plain_cpu);
+
+    return status;
+}
+
+// Test that CPU decryptions can use old keys i.e. previous versions of the keys
+// that are no longer the current key, due to key rotation. Given that SEC2
+// does not expose encryption capabilities, the "decrypt-after-rotation" problem
+// is exclusive of CE encryptions.
+static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
+{
+    // Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
+    // between the GPU encryption, and the associated CPU decryption.
+    unsigned num_rotations_to_insert = 8;
+
+    TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
+
+    return NV_OK;
+}
+
+static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+
+    if (!g_uvm_global.conf_computing_enabled)
+        return NV_OK;
+
+    for_each_va_space_gpu(gpu, va_space) {
+        if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
+            break;
+
+        TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
+
+        TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
+
+        TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;

@@ -987,7 +1312,7 @@ static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
    return NV_OK;
 }

-static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
+NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;

@@ -1035,7 +1360,7 @@ static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
    return NV_OK;
 }

-static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
+NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
 {
    NV_STATUS status = NV_OK;
    uvm_gpu_t *gpu;
@@ -1203,6 +1528,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
    if (status != NV_OK)
        goto done;

+    status = test_channel_key_rotation(va_space);
+    if (status != NV_OK)
+        goto done;
+
    // The following tests have side effects, they reset the GPU's
    // channel_manager.
    status = test_channel_pushbuffer_extension_base(va_space);
@@ -1338,6 +1667,126 @@ done:
    return status;
 }

+static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    int i;
+    uvm_channel_pool_t *cpu_to_gpu_pool;
+    NV_STATUS status = NV_OK;
+    size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
+    void *initial_plain_cpu = NULL;
+    uvm_mem_t *plain_gpu = NULL;
+    uvm_gpu_address_t plain_gpu_address;
+
+    UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
+
+    cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
+    TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
+
+    initial_plain_cpu = uvm_kvmalloc_zero(size);
+    if (initial_plain_cpu == NULL) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
+    plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
+
+    memset(initial_plain_cpu, 1, size);
+
+    for (i = 0; i < params->iterations; i++) {
+        TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
+                                                                      plain_gpu_address,
+                                                                      initial_plain_cpu,
+                                                                      size,
+                                                                      NULL,
+                                                                      "CPU > GPU"),
+                           out);
+    }
+
+out:
+    uvm_mem_free(plain_gpu);
+    uvm_kvfree(initial_plain_cpu);
+
+    return status;
+}
+
+static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    unsigned num_rotations_to_insert = 0;
+
+    UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
+
+    return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
+}
+
+static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    NvU32 i;
+
+    UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
+
+    for (i = 0; i < params->iterations; ++i) {
+        NV_STATUS status;
+        uvm_channel_pool_t *pool;
+        uvm_channel_type_t type;
+
+        if ((i % 3) == 0)
+            type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
+        else if ((i % 3) == 1)
+            type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
+        else
+            type = UVM_CHANNEL_TYPE_WLC;
+
+        pool = gpu->channel_manager->pool_to_use.default_for_type[type];
+
+        if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
+            return NV_ERR_INVALID_STATE;
+
+        status = force_key_rotation(pool);
+        if (status != NV_OK)
+            return status;
+    }
+
+    return NV_OK;
+}
+
+// The objective of this test is documented in the user-level function
+static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    uvm_test_rng_t rng;
+    uvm_gpu_t *gpu;
+    NV_STATUS status = NV_OK;
+
+    if (!g_uvm_global.conf_computing_enabled)
+        return NV_OK;
+
+    uvm_test_rng_init(&rng, params->seed);
+
+    uvm_va_space_down_read(va_space);
+
+    // Key rotation should be enabled, or disabled, in all GPUs. Pick a random
+    // one.
+    gpu = random_va_space_gpu(&rng, va_space);
+
+    if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
+        goto out;
+
+    if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
+        status = channel_stress_key_rotation_cpu_encryption(gpu, params);
+    else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
+        status = channel_stress_key_rotation_cpu_decryption(gpu, params);
+    else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
+        status = channel_stress_key_rotation_rotate(gpu, params);
+    else
+        status = NV_ERR_INVALID_PARAMETER;
+
+out:
+    uvm_va_space_up_read(va_space);
+
+    return status;
+}
+
 NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
 {
    uvm_va_space_t *va_space = uvm_va_space_get(filp);
@@ -1349,6 +1798,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
            return uvm_test_channel_stress_update_channels(va_space, params);
        case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
            return uvm_test_channel_noop_push(va_space, params);
+        case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
+            return uvm_test_channel_stress_key_rotation(va_space, params);
        default:
            return NV_ERR_INVALID_PARAMETER;
    }
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2023 NVIDIA Corporation
+    Copyright (c) 2013-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -423,7 +423,9 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
    UVM_ASSERT(first);
    UVM_ASSERT(outer);

-    if (uvm_platform_uses_canonical_form_address()) {
+    // Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
+    // even when plugged into platforms using it.
+    if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
        *first = 1ULL << (num_va_bits - 1);
        *outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
    }
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -33,6 +33,15 @@
 #include "nv_uvm_interface.h"
 #include "uvm_va_block.h"

+// Amount of encrypted data on a given engine that triggers key rotation. This
+// is a UVM internal threshold, different from that of RM, and used only during
+// testing.
+//
+// Key rotation is triggered when the total encryption size, or the total
+// decryption size (whatever comes first) reaches this lower threshold on the
+// engine.
+#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
+
 // The maximum number of secure operations per push is:
 // UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
 // + 1 (tracking semaphore) =  128 * 1024 / 56 + 1 = 2342
@@ -352,6 +361,19 @@ error:
    return status;
 }

+// The production key rotation defaults are such that key rotations rarely
+// happen. During UVM testing more frequent rotations are triggering by relying
+// on internal encryption usage accounting. When key rotations are triggered by
+// UVM, the driver does not rely on channel key rotation notifiers.
+//
+// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
+// lower threshold. This function, and all the metadata associated with it
+// (per-pool encryption accounting, for example) can be removed at that point.
+static bool key_rotation_is_notifier_driven(void)
+{
+    return !uvm_enable_builtin_tests;
+}
+
 NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
 {
    NV_STATUS status;
@@ -394,17 +416,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
    conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
 }

-void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
+void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
 {
    NV_STATUS status;
+    uvm_channel_pool_t *pool;
+
+    if (uvm_channel_is_lcic(channel))
+        pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
+    else
+        pool = channel->pool;

    uvm_mutex_lock(&channel->csl.ctx_lock);
+
+    if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
+        status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
+
+        // Informing RM of an encryption/decryption should not fail
+        UVM_ASSERT(status == NV_OK);
+
+        if (!key_rotation_is_notifier_driven())
+            atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
+    }
+
    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
-    uvm_mutex_unlock(&channel->csl.ctx_lock);

    // IV rotation is done preemptively as needed, so the above
    // call cannot return failure.
    UVM_ASSERT(status == NV_OK);
+
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
 }

 void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
@@ -428,27 +468,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
                                    void *auth_tag_buffer)
 {
    NV_STATUS status;
+    uvm_channel_pool_t *pool;

    UVM_ASSERT(size);

+    if (uvm_channel_is_lcic(channel))
+        pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
+    else
+        pool = channel->pool;
+
    uvm_mutex_lock(&channel->csl.ctx_lock);
+
    status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
                                      size,
                                      (NvU8 const *) src_plain,
                                      encrypt_iv,
                                      (NvU8 *) dst_cipher,
                                      (NvU8 *) auth_tag_buffer);
-    uvm_mutex_unlock(&channel->csl.ctx_lock);

    // IV rotation is done preemptively as needed, so the above
    // call cannot return failure.
    UVM_ASSERT(status == NV_OK);
+
+    if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
+        status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
+
+        // Informing RM of an encryption/decryption should not fail
+        UVM_ASSERT(status == NV_OK);
+
+        if (!key_rotation_is_notifier_driven())
+            atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
+    }
+
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
 }

 NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
                                         void *dst_plain,
                                         const void *src_cipher,
                                         const UvmCslIv *src_iv,
+                                         NvU32 key_version,
                                         size_t size,
                                         const void *auth_tag_buffer)
 {
@@ -469,11 +528,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
                                      size,
                                      (const NvU8 *) src_cipher,
                                      src_iv,
-                                      NV_U32_MAX,
+                                      key_version,
                                      (NvU8 *) dst_plain,
                                      NULL,
                                      0,
                                      (const NvU8 *) auth_tag_buffer);
+
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
+                      nvstatusToString(status),
+                      channel->name,
+                      uvm_gpu_name(uvm_channel_get_gpu(channel)));
+    }
+
    uvm_mutex_unlock(&channel->csl.ctx_lock);

    return status;
@@ -640,3 +707,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
 {
    return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
 }
+
+void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
+{
+    if (!g_uvm_global.conf_computing_enabled)
+        return;
+
+    // Key rotation cannot be enabled on UVM if it is disabled on RM
+    if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
+        return;
+
+    gpu->channel_manager->conf_computing.key_rotation_enabled = true;
+}
+
+void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
+{
+    if (!g_uvm_global.conf_computing_enabled)
+        return;
+
+    gpu->channel_manager->conf_computing.key_rotation_enabled = false;
+}
+
+bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
+{
+    return gpu->channel_manager->conf_computing.key_rotation_enabled;
+}
+
+bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
+{
+    if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
+        return false;
+
+    // TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
+    // because currently the encryption key is shared between UVM and RM, but
+    // UVM is not able to idle SEC2 channels owned by RM.
+    if (uvm_channel_pool_is_sec2(pool))
+        return false;
+
+    // Key rotation happens as part of channel reservation, and LCIC channels
+    // are never reserved directly. Rotation of keys in LCIC channels happens
+    // as the result of key rotation in WLC channels.
+    //
+    // Return false even if there is nothing fundamental prohibiting direct key
+    // rotation on LCIC pools
+    if (uvm_channel_pool_is_lcic(pool))
+        return false;
+
+    return true;
+}
+
+static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
+{
+    NvU64 decrypted, encrypted;
+
+    UVM_ASSERT(!key_rotation_is_notifier_driven());
+
+    decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
+
+    if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
+        return true;
+
+    encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
+
+    if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
+        return true;
+
+    return false;
+}
+
+static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
+{
+    // If key rotation is pending for the pool's engine, then the key rotation
+    // notifier in any of the engine channels can be used by UVM to detect the
+    // situation. Note that RM doesn't update all the notifiers in a single
+    // atomic operation, so it is possible that the channel read by UVM (the
+    // first one in the pool) indicates that a key rotation is pending, but
+    // another channel in the pool (temporarily) indicates the opposite, or vice
+    // versa.
+    uvm_channel_t *first_channel = pool->channels;
+
+    UVM_ASSERT(key_rotation_is_notifier_driven());
+    UVM_ASSERT(first_channel != NULL);
+
+    return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
+}
+
+bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
+{
+    if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
+        return false;
+
+    if (key_rotation_is_notifier_driven())
+        return conf_computing_is_key_rotation_pending_use_notifier(pool);
+    else
+        return conf_computing_is_key_rotation_pending_use_stats(pool);
+}
+
+NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
+    UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
+    UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
+
+    // NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
+    // required locks at this time. This status is not interpreted as an error,
+    // but as a sign for UVM to try again later. This is the same "protocol"
+    // used in IV rotation.
+    status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
+                                        pool->conf_computing.key_rotation.num_csl_contexts);
+
+    if (status == NV_OK) {
+        pool->conf_computing.key_rotation.version++;
+
+        if (!key_rotation_is_notifier_driven()) {
+            atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
+            atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
+        }
+    }
+    else if (status != NV_ERR_STATE_IN_USE) {
+        UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
+                      pool->engine_index,
+                      nvstatusToString(status));
+    }
+
+    return status;
+}
+
+__attribute__ ((format(printf, 6, 7)))
+NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
+                                                     uvm_gpu_address_t dst_gpu_address,
+                                                     void *src_plain,
+                                                     size_t size,
+                                                     uvm_tracker_t *tracker,
+                                                     const char *format,
+                                                     ...)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+    uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
+    void *dst_cipher, *auth_tag;
+    va_list args;
+
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
+    UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
+
+    status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
+    if (status != NV_OK)
+        return status;
+
+    va_start(args, format);
+    status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
+    va_end(args);
+
+    if (status != NV_OK)
+        goto out;
+
+    dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
+    auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
+    uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
+
+    src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
+    auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
+    gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
+
+    status = uvm_push_end_and_wait(&push);
+
+out:
+    uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
+    return status;
+}
+
+__attribute__ ((format(printf, 6, 7)))
+NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
+                                                     void *dst_plain,
+                                                     uvm_gpu_address_t src_gpu_address,
+                                                     size_t size,
+                                                     uvm_tracker_t *tracker,
+                                                     const char *format,
+                                                     ...)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+    uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
+    void *src_cipher, *auth_tag;
+    va_list args;
+
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
+    UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
+
+    status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
+    if (status != NV_OK)
+        return status;
+
+    va_start(args, format);
+    status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
+    va_end(args);
+
+    if (status != NV_OK)
+        goto out;
+
+    uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
+    dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
+
+    dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
+    auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
+    gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
+
+    status = uvm_push_end_and_wait(&push);
+    if (status != NV_OK)
+        goto out;
+
+    src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
+    auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
+    status = uvm_conf_computing_cpu_decrypt(push.channel,
+                                            dst_plain,
+                                            src_cipher,
+                                            dma_buffer->decrypt_iv,
+                                            dma_buffer->key_version[0],
+                                            size,
+                                            auth_tag);
+
+ out:
+    uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@@ -87,9 +87,9 @@ typedef struct
    // a free buffer.
    uvm_tracker_t tracker;

-    // When the DMA buffer is used as the destination of a GPU encryption, SEC2
-    // writes the authentication tag here. Later when the buffer is decrypted
-    // on the CPU the authentication tag is used again (read) for CSL to verify
+    // When the DMA buffer is used as the destination of a GPU encryption, the
+    // engine (CE or SEC2) writes the authentication tag here. When the buffer
+    // is decrypted on the CPU the authentication tag is used by CSL to verify
    // the authenticity. The allocation is big enough for one authentication
    // tag per PAGE_SIZE page in the alloc buffer.
    uvm_mem_t *auth_tag;
@@ -98,7 +98,12 @@ typedef struct
    // to the authentication tag. The allocation is big enough for one IV per
    // PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
    // IV and authentication tag must match.
-    UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
+    UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
+
+    // When the DMA buffer is used as the destination of a GPU encryption, the
+    // key version used during GPU encryption of each PAGE_SIZE page can be
+    // saved here, so CPU decryption uses the correct decryption key.
+    NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];

    // Bitmap of the encrypted pages in the backing allocation
    uvm_page_mask_t encrypted_page_mask;
@@ -147,7 +152,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
 void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);

 // Logs encryption information from the GPU and returns the IV.
-void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
+void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);

 // Acquires next CPU encryption IV and returns it.
 void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
@@ -167,10 +172,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
 // CPU side decryption helper. Decrypts data from src_cipher and writes the
 // plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
 // from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
+//
+// The caller must indicate which key to use for decryption by passing the
+// appropiate key version number.
 NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
                                         void *dst_plain,
                                         const void *src_cipher,
                                         const UvmCslIv *src_iv,
+                                         NvU32 key_version,
                                         size_t size,
                                         const void *auth_tag_buffer);

@@ -214,4 +223,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
 // Check if there are fewer than 'limit' messages available in either direction
 // and rotate if not.
 NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
+
+// Rotate the engine key associated with the given channel pool.
+NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
+
+// Returns true if key rotation is allowed in the channel pool.
+bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
+
+// Returns true if key rotation is pending in the channel pool.
+bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
+
+// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
+// dependent on RM enablement: key rotation may still be disabled upon calling
+// this function, if it is disabled in RM. On the other hand, key rotation can
+// be disabled in UVM, even if it is enabled in RM.
+//
+// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
+// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
+// kernel key rotation.
+void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
+void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
+
+// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
+// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
+// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
+bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
+
+// Launch a synchronous, encrypted copy between CPU and GPU.
+//
+// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
+//
+// The source CPU buffer pointed by src_plain contains the unencrypted (plain
+// text) contents; the function internally performs a CPU-side encryption step
+// before launching the GPU-side CE decryption. The source buffer can be in
+// protected or unprotected sysmem, while the destination buffer must be in
+// protected vidmem.
+//
+// The input tracker, if not NULL, is internally acquired by the push
+// responsible for the encrypted copy.
+__attribute__ ((format(printf, 6, 7)))
+NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
+                                                     uvm_gpu_address_t dst_gpu_address,
+                                                     void *src_plain,
+                                                     size_t size,
+                                                     uvm_tracker_t *tracker,
+                                                     const char *format,
+                                                     ...);
+
+// Launch a synchronous, encrypted copy between CPU and GPU.
+//
+// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
+//
+// The source CPU buffer pointed by src_plain contains the unencrypted (plain
+// text) contents; the function internally performs a CPU-side encryption step
+// before launching the GPU-side CE decryption. The source buffer can be in
+// protected or unprotected sysmem, while the destination buffer must be in
+// protected vidmem.
+//
+// The input tracker, if not NULL, is internally acquired by the push
+// responsible for the encrypted copy.
+__attribute__ ((format(printf, 6, 7)))
+NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
+                                                     void *dst_plain,
+                                                     uvm_gpu_address_t src_gpu_address,
+                                                     size_t size,
+                                                     uvm_tracker_t *tracker,
+                                                     const char *format,
+                                                     ...);
 #endif // __UVM_CONF_COMPUTING_H__
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,6 +119,10 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
    if (memory_owning_gpu == NULL)
        return NV_ERR_INVALID_DEVICE;

+    // TODO: Bug 1903234: Once RM supports indirect peer mappings, we'll need to
+    //       update this test since the aperture will be SYS. Depending on how
+    //       RM implements things, we might not be able to compare the physical
+    //       addresses either.
    aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);

    if (is_cacheable(ext_mapping_info, aperture))
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -81,8 +81,6 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
            return UVM_GPU_LINK_NVLINK_3;
        case UVM_LINK_TYPE_NVLINK_4:
            return UVM_GPU_LINK_NVLINK_4;
-        case UVM_LINK_TYPE_NVLINK_5:
-            return UVM_GPU_LINK_NVLINK_5;
        case UVM_LINK_TYPE_C2C:
            return UVM_GPU_LINK_C2C;
        default:
@@ -220,9 +218,8 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
    if (status != NV_OK)
        return status;

-    UVM_ASSERT(gpu_address_space_info.bigPageSize <= NV_U32_MAX);
-
    gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
+
    gpu->time.time0_register = gpu_address_space_info.time0Offset;
    gpu->time.time1_register = gpu_address_space_info.time1Offset;

@@ -461,8 +458,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)

 static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
 {
-
-    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);
+    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);

    switch (link_type) {
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
@@ -471,7 +467,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
-        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_5);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
        UVM_ENUM_STRING_DEFAULT();
    }
@@ -1087,6 +1082,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
                   gpu->parent->rm_va_size,
                   va_per_entry);

+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
+
    tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
    status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
                                                               tree_alloc->addr.address,
@@ -1682,9 +1680,12 @@ static void remove_gpu(uvm_gpu_t *gpu)
    // TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
    uvm_conf_computing_gpu_deinit(gpu);

-    // If the parent is not being freed, the following gpu_table_lock is only
-    // needed to protect concurrent uvm_parent_gpu_find_first_valid_gpu() in BH
-    // from the __clear_bit here.
+    // TODO: Bug 2844714: If the parent is not being freed, the following
+    // gpu_table_lock is only needed to protect concurrent
+    // find_first_valid_gpu() in BH from the __clear_bit here. After
+    // find_first_valid_gpu() is removed, gpu_table_lock should only be acquired
+    // and released in the free_parent case.
+    //
    // In the free_parent case, gpu_table_lock protects the top half from the
    // uvm_global_remove_parent_gpu()
    uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
@@ -2255,13 +2256,28 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
    bool sorted;
    NvU32 ce0, ce1;

-    if (peer_caps->link_type < UVM_GPU_LINK_NVLINK_1)
+    UVM_ASSERT(peer_caps->ref_count);
+    UVM_ASSERT(gpu0->parent->peer_copy_mode == gpu1->parent->peer_copy_mode);
+
+    if (gpu0->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED)
        return;

    sorted = uvm_id_value(gpu0->id) < uvm_id_value(gpu1->id);
    ce0 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 0 : 1];
    ce1 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 1 : 0];

+    // Indirect peers communicate through the CPU, so the optimal CE
+    // should match the one selected for writing to system memory
+    if (peer_caps->is_indirect_peer) {
+        uvm_channel_pool_t *pool;
+
+        pool = gpu0->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+        UVM_ASSERT(ce0 == pool->engine_index);
+
+        pool = gpu1->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+        UVM_ASSERT(ce1 == pool->engine_index);
+    }
+
    uvm_channel_manager_set_p2p_ce(gpu0->channel_manager, gpu1, ce0);
    uvm_channel_manager_set_p2p_ce(gpu1->channel_manager, gpu0, ce1);
 }
@@ -2269,7 +2285,7 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
 static int nv_procfs_read_gpu_peer_caps(struct seq_file *s, void *v)
 {
    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_peer_caps_print((uvm_gpu_t **)s->private, s);

@@ -2351,51 +2367,74 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,

    // check for peer-to-peer compatibility (PCI-E or NvLink).
    peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
-    if (peer_caps->link_type == UVM_GPU_LINK_INVALID || peer_caps->link_type == UVM_GPU_LINK_C2C)
+    if (peer_caps->link_type == UVM_GPU_LINK_INVALID
+        || peer_caps->link_type == UVM_GPU_LINK_C2C
+        )
        return NV_ERR_NOT_SUPPORTED;

    peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;

    // Initialize peer ids and establish peer mappings
-    // Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
-    peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];
+    peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);

-    // Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
-    peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];
+    if (peer_caps->is_indirect_peer) {
+        UVM_ASSERT(gpu0->mem_info.numa.enabled);
+        UVM_ASSERT(gpu1->mem_info.numa.enabled);

-    // Establish peer mappings from each GPU to the other.
-    status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
-    if (status != NV_OK)
-        return status;
+        status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
+        if (status != NV_OK)
+            return status;

-    status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
-    if (status != NV_OK)
-        return status;
+        status = uvm_pmm_gpu_indirect_peer_init(&gpu1->pmm, gpu0);
+        if (status != NV_OK)
+            return status;

-    set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
+        set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
+        UVM_ASSERT(peer_caps->total_link_line_rate_mbyte_per_s == 0);
+    }
+    else {
+        // Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
+        peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];

-    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
-    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
+        // Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
+        peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];

-    // In the case of NVLINK peers, this initialization will happen during
-    // add_gpu. As soon as the peer info table is assigned below, the access
-    // counter bottom half could start operating on the GPU being newly
-    // added and inspecting the peer caps, so all of the appropriate
-    // initialization must happen before this point.
-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+        // Establish peer mappings from each GPU to the other. Indirect peers
+        // do not require identity mappings since they use sysmem aperture to
+        // communicate.
+        status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
+        if (status != NV_OK)
+            return status;

-    uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
+        status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
+        if (status != NV_OK)
+            return status;

-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+        set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);

-    uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
+        UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
+        UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+        // In the case of NVLINK peers, this initialization will happen during
+        // add_gpu. As soon as the peer info table is assigned below, the access
+        // counter bottom half could start operating on the GPU being newly
+        // added and inspecting the peer caps, so all of the appropriate
+        // initialization must happen before this point.
+        uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+
+        uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
+        UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
+        gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
+
+        uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+        uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+
+        uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
+        UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
+        gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
+
+        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    }

    return init_procfs_peer_files(gpu0, gpu1);
 }
@@ -2463,6 +2502,7 @@ static NV_STATUS enable_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
        goto cleanup;

    // Sanity checks
+    UVM_ASSERT(p2p_caps_params.indirectAccess == NV_FALSE);
    UVM_ASSERT(p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE);

    status = init_peer_access(gpu0, gpu1, &p2p_caps_params, peer_caps);
@@ -2492,26 +2532,29 @@ static NV_STATUS enable_nvlink_peer_access(uvm_gpu_t *gpu0,
    UVM_ASSERT(peer_caps->ref_count == 0);
    peer_caps->ref_count = 1;

-    // Create P2P object for direct NVLink peers
-    status = create_p2p_object(gpu0, gpu1, &p2p_handle);
-    if (status != NV_OK) {
-        UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
-                       nvstatusToString(status),
-                       uvm_gpu_name(gpu0),
-                       uvm_gpu_name(gpu1));
-        return status;
+    if (!p2p_caps_params->indirectAccess) {
+        // Create P2P object for direct NVLink peers
+        status = create_p2p_object(gpu0, gpu1, &p2p_handle);
+        if (status != NV_OK) {
+            UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
+                           nvstatusToString(status),
+                           uvm_gpu_name(gpu0),
+                           uvm_gpu_name(gpu1));
+            return status;
+        }
+
+        UVM_ASSERT(p2p_handle != 0);
+
+        // Store the handle in the global table.
+        peer_caps->p2p_handle = p2p_handle;
+
+        // Update p2p caps after p2p object creation as it generates the peer
+        // ids
+        status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
+        if (status != NV_OK)
+            goto cleanup;
    }

-    UVM_ASSERT(p2p_handle != 0);
-
-    // Store the handle in the global table.
-    peer_caps->p2p_handle = p2p_handle;
-
-    // Update p2p caps after p2p object creation as it generates the peer ids.
-    status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
-    if (status != NV_OK)
-        goto cleanup;
-
    status = init_peer_access(gpu0, gpu1, p2p_caps_params, peer_caps);
    if (status != NV_OK)
        goto cleanup;
@@ -2546,6 +2589,11 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
        if (p2p_caps_params.p2pLink == UVM_LINK_TYPE_NONE || p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE)
            continue;

+        // Indirect peers are only supported when onlined as NUMA nodes, because
+        // we want to use vm_insert_page and dma_map_page.
+        if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
+            continue;
+
        status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
        if (status != NV_OK)
            goto cleanup;
@@ -2634,25 +2682,32 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
        deinit_procfs_peer_cap_files(peer_caps);

    p2p_handle = peer_caps->p2p_handle;
-    UVM_ASSERT(p2p_handle);

-    uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
-    uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
+    if (peer_caps->is_indirect_peer) {
+        uvm_pmm_gpu_indirect_peer_destroy(&gpu0->pmm, gpu1);
+        uvm_pmm_gpu_indirect_peer_destroy(&gpu1->pmm, gpu0);
+    }
+    else {
+        UVM_ASSERT(p2p_handle);

-    uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
+        uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
+        uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);

-    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
-    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
+        uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));

-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
-    uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+        UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
+        UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
-    uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+        uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+        uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
+        gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
+        uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+
+        uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+        uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
+        gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
+        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    }

    // Flush the access counter buffer to avoid getting stale notifications for
    // accesses to GPUs to which peer access is being disabled. This is also
@@ -2692,6 +2747,10 @@ static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_
 {
    size_t peer_index;

+    // Indirect peers are accessed as sysmem addresses
+    if (peer_caps->is_indirect_peer)
+        return UVM_APERTURE_SYS;
+
    // MIG instances in the same physical GPU have vidmem addresses
    if (local_gpu->parent == remote_gpu->parent)
        return UVM_APERTURE_VID;
@@ -2742,7 +2801,6 @@ uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_p
    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
        uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
        UVM_ASSERT(other_gpu);
-        UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));

        if (uvm_gpus_are_nvswitch_connected(gpu, other_gpu)) {
            // NVSWITCH connected systems use an extended physical address to
@@ -2779,7 +2837,7 @@ static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)

    // Instance pointers must be 4k aligned and they must have either VID or SYS
    // apertures. Compress them as much as we can both to guarantee that the key
-    // fits within 64 bits, and to make the key space as small as possible.
+    // fits within 64 bits, and to make the table as shallow as possible.
    UVM_ASSERT(IS_ALIGNED(instance_ptr.address, UVM_PAGE_SIZE_4K));
    UVM_ASSERT(instance_ptr.aperture == UVM_APERTURE_VID || instance_ptr.aperture == UVM_APERTURE_SYS);

@@ -2796,7 +2854,7 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
    uvm_rb_tree_node_t *channel_tree_node;
    uvm_user_channel_subctx_info_t *channel_subctx_info;
    uvm_user_channel_subctx_info_t *new_channel_subctx_info = NULL;
-    uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
+    uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;

    if (!user_channel->in_subctx)
        return NV_OK;
@@ -2840,21 +2898,21 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren

    user_channel->subctx_info = channel_subctx_info;

-    // Register the GPU VA space of the channel subcontext info descriptor, or
+    // Register the VA space of the channel subcontext info descriptor, or
    // check that the existing one matches the channel's
    if (channel_subctx_info->subctxs[user_channel->subctx_id].refcount++ > 0) {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space 0x%llx but got 0x%llx instead\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
                       uvm_aperture_string(instance_ptr.aperture),
                       user_channel->subctx_id,
                       user_channel->tsg.id,
-                       (NvU64)gpu_va_space,
-                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space != NULL,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: GPU VA space is NULL\n",
+                       (NvU64)va_space,
+                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space != NULL,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: VA space is NULL\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
@@ -2871,17 +2929,17 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
                       user_channel->tsg.id);
    }
    else {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == NULL,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space NULL but got 0x%llx instead\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == NULL,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space NULL but got 0x%llx instead\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
                       uvm_aperture_string(instance_ptr.aperture),
                       user_channel->subctx_id,
                       user_channel->tsg.id,
-                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
+                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);

-        channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = gpu_va_space;
+        channel_subctx_info->subctxs[user_channel->subctx_id].va_space = va_space;
    }

    ++channel_subctx_info->total_refcount;
@@ -2905,7 +2963,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
                                                              uvm_user_channel_t *user_channel)
 {
    uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
-    uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
+    uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;

    uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);

@@ -2934,17 +2992,16 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
                   user_channel->subctx_id,
                   user_channel->tsg.id);

-    UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
-                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: "
-                   "expected GPU VA space 0x%llx but got 0x%llx instead\n",
+    UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
+                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
                   user_channel->hw_runlist_id,
                   user_channel->hw_channel_id,
                   instance_ptr.address,
                   uvm_aperture_string(instance_ptr.aperture),
                   user_channel->subctx_id,
                   user_channel->tsg.id,
-                   (NvU64)gpu_va_space,
-                   (NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
+                   (NvU64)va_space,
+                   (NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space);

    UVM_ASSERT_MSG(user_channel->subctx_info->total_refcount > 0,
                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: TSG refcount is 0\n",
@@ -2957,7 +3014,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *

    // Decrement VA space refcount. If it gets to zero, unregister the pointer
    if (--user_channel->subctx_info->subctxs[user_channel->subctx_id].refcount == 0)
-        user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = NULL;
+        user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space = NULL;

    if (--user_channel->subctx_info->total_refcount == 0) {
        uvm_rb_tree_remove(&parent_gpu->tsg_table, &user_channel->subctx_info->node);
@@ -3040,7 +3097,7 @@ static uvm_user_channel_t *instance_ptr_to_user_channel(uvm_parent_gpu_t *parent
    return get_user_channel(instance_node);
 }

-static uvm_gpu_va_space_t *user_channel_and_subctx_to_gpu_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
+static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
 {
    uvm_user_channel_subctx_info_t *channel_subctx_info;

@@ -3068,31 +3125,28 @@ static uvm_gpu_va_space_t *user_channel_and_subctx_to_gpu_va_space(uvm_user_chan
    // uncleanly and work from that subcontext continues running with work from
    // other subcontexts.
    if (channel_subctx_info->subctxs[subctx_id].refcount == 0) {
-        UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].gpu_va_space == NULL);
+        UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].va_space == NULL);
    }
    else {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].gpu_va_space,
-                       "instance_ptr {0x%llx:%s} in TSG %u: no GPU VA space for SubCTX %u\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].va_space,
+                       "instance_ptr {0x%llx:%s} in TSG %u: no VA space for SubCTX %u\n",
                       user_channel->instance_ptr.addr.address,
                       uvm_aperture_string(user_channel->instance_ptr.addr.aperture),
                       user_channel->tsg.id,
                       subctx_id);
    }

-    return channel_subctx_info->subctxs[subctx_id].gpu_va_space;
+    return channel_subctx_info->subctxs[subctx_id].va_space;
 }

 NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                 const uvm_fault_buffer_entry_t *fault,
-                                                 uvm_va_space_t **out_va_space,
-                                                 uvm_gpu_t **out_gpu)
+                                                 uvm_fault_buffer_entry_t *fault,
+                                                 uvm_va_space_t **out_va_space)
 {
    uvm_user_channel_t *user_channel;
-    uvm_gpu_va_space_t *gpu_va_space;
    NV_STATUS status = NV_OK;

    *out_va_space = NULL;
-    *out_gpu = NULL;

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);

@@ -3113,10 +3167,8 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
        // We can safely access user_channel->gpu_va_space under the
        // instance_ptr_table_lock since gpu_va_space is set to NULL after this
        // function is called in uvm_user_channel_detach
-        gpu_va_space = user_channel->gpu_va_space;
-        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
-        *out_va_space = gpu_va_space->va_space;
-        *out_gpu = gpu_va_space->gpu;
+        UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+        *out_va_space = user_channel->gpu_va_space->va_space;
    }
    else {
        NvU32 ve_id = fault->fault_source.ve_id;
@@ -3126,17 +3178,12 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,

        ve_id -= user_channel->smc_engine_ve_id_offset;

-        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, ve_id);
+        *out_va_space = user_channel_and_subctx_to_va_space(user_channel, ve_id);

        // Instance pointer is valid but the fault targets a non-existent
        // subcontext.
-        if (gpu_va_space) {
-            *out_va_space = gpu_va_space->va_space;
-            *out_gpu = gpu_va_space->gpu;
-        }
-        else {
+        if (!*out_va_space)
            status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
-        }
    }

 exit_unlock:
@@ -3146,16 +3193,13 @@ exit_unlock:
 }

 NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                          const uvm_access_counter_buffer_entry_t *entry,
-                                                          uvm_va_space_t **out_va_space,
-                                                          uvm_gpu_t **out_gpu)
+                                                          uvm_access_counter_buffer_entry_t *entry,
+                                                          uvm_va_space_t **out_va_space)
 {
    uvm_user_channel_t *user_channel;
-    uvm_gpu_va_space_t *gpu_va_space;
    NV_STATUS status = NV_OK;

    *out_va_space = NULL;
-    *out_gpu = NULL;
    UVM_ASSERT(entry->address.is_virtual);

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
@@ -3171,20 +3215,13 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
                       "Access counter packet contains SubCTX %u for channel not in subctx\n",
                       entry->virtual_info.ve_id);

-        gpu_va_space = user_channel->gpu_va_space;
-        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
-        *out_va_space = gpu_va_space->va_space;
-        *out_gpu = gpu_va_space->gpu;
+        UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+        *out_va_space = user_channel->gpu_va_space->va_space;
    }
    else {
-        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
-        if (gpu_va_space) {
-            *out_va_space = gpu_va_space->va_space;
-            *out_gpu = gpu_va_space->gpu;
-        }
-        else {
+        *out_va_space = user_channel_and_subctx_to_va_space(user_channel, entry->virtual_info.ve_id);
+        if (!*out_va_space)
            status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
-        }
    }

 exit_unlock:
@@ -3262,10 +3299,7 @@ void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64
    atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
 }

-NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
-                                       struct page *page,
-                                       size_t size,
-                                       NvU64 *dma_address_out)
+NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
 {
    NvU64 dma_addr;

--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -279,10 +279,6 @@ struct uvm_fault_service_batch_context_struct
    // pick one to be the target of the cancel sequence.
    uvm_va_space_t *fatal_va_space;

-    // TODO: Bug 3900733: refactor service_fault_batch_for_cancel() to handle
-    // iterating over multiple GPU VA spaces and remove fatal_gpu.
-    uvm_gpu_t *fatal_gpu;
-
    bool has_throttled_faults;

    NvU32 num_invalid_prefetch_faults;
@@ -597,7 +593,6 @@ typedef enum
    UVM_GPU_LINK_NVLINK_2,
    UVM_GPU_LINK_NVLINK_3,
    UVM_GPU_LINK_NVLINK_4,
-    UVM_GPU_LINK_NVLINK_5,
    UVM_GPU_LINK_C2C,
    UVM_GPU_LINK_MAX
 } uvm_gpu_link_type_t;
@@ -967,6 +962,8 @@ struct uvm_parent_gpu_struct
    // Whether CE supports physical addressing mode for writes to vidmem
    bool ce_phys_vidmem_write_supported;

+    // Addressing mode(s) supported for CE transfers between this GPU and its
+    // peers: none, physical only, physical and virtual, etc.
    uvm_gpu_peer_copy_mode_t peer_copy_mode;

    // Virtualization mode of the GPU.
@@ -1270,6 +1267,11 @@ struct uvm_gpu_peer_struct
    // peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
    NvU8 peer_ids[2];

+    // Indirect peers are GPUs which can coherently access each others' memory
+    // over NVLINK, but are routed through the CPU using the SYS aperture rather
+    // than a PEER aperture
+    NvU8 is_indirect_peer : 1;
+
    // The link type between the peer GPUs, currently either PCIe or NVLINK.
    // This field is used to determine the when this peer struct has been
    // initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
@@ -1278,8 +1280,8 @@ struct uvm_gpu_peer_struct
    uvm_gpu_link_type_t link_type;

    // Maximum unidirectional bandwidth between the peers in megabytes per
-    // second, not taking into account the protocols' overhead.
-    // See UvmGpuP2PCapsParams.
+    // second, not taking into account the protocols' overhead. The reported
+    // bandwidth for indirect peers is zero. See UvmGpuP2PCapsParams.
    NvU32 total_link_line_rate_mbyte_per_s;

    // For PCIe, the number of times that this has been retained by a VA space.
@@ -1423,9 +1425,19 @@ static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu
    return false;
 }

-static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
+static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 {
-    return gpu0->parent == gpu1->parent;
+    uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
+
+    if (peer_caps->link_type != UVM_GPU_LINK_INVALID && peer_caps->is_indirect_peer) {
+        UVM_ASSERT(gpu0->mem_info.numa.enabled);
+        UVM_ASSERT(gpu1->mem_info.numa.enabled);
+        UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_PCIE);
+        UVM_ASSERT(!uvm_gpus_are_nvswitch_connected(gpu0, gpu1));
+        return true;
+    }
+
+    return false;
 }

 // Retrieve the virtual address corresponding to the given vidmem physical
@@ -1610,25 +1622,16 @@ void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_c
 //  NV_ERR_PAGE_TABLE_NOT_AVAIL  Entry's instance pointer is valid but the entry
 //                               targets an invalid subcontext
 //
-// out_va_space is valid if NV_OK is returned, otherwise it's NULL.
-// out_gpu is valid if NV_OK is returned, otherwise it's NULL.
-// The caller is responsible for ensuring that the returned va_space and gpu
-// can't be destroyed, so this function should only be called from the bottom
-// half.
+// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
+// is responsibile for ensuring that the returned va_space can't be destroyed,
+// so these functions should only be called from the bottom half.
 NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                 const uvm_fault_buffer_entry_t *fault,
-                                                 uvm_va_space_t **out_va_space,
-                                                 uvm_gpu_t **out_gpu);
+                                                 uvm_fault_buffer_entry_t *fault,
+                                                 uvm_va_space_t **out_va_space);

-// Return the GPU VA space for the given instance pointer and ve_id in the
-// access counter entry. This function can only be used for virtual address
-// entries.
-// The return values are the same as uvm_parent_gpu_fault_entry_to_va_space()
-// but for virtual access counter entries.
 NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                          const uvm_access_counter_buffer_entry_t *entry,
-                                                          uvm_va_space_t **out_va_space,
-                                                          uvm_gpu_t **out_gpu);
+                                                          uvm_access_counter_buffer_entry_t *entry,
+                                                          uvm_va_space_t **out_va_space);

 typedef enum
 {
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -684,7 +684,10 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,

    while (get != put) {
        // Wait until valid bit is set
-        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
+        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
+            if (uvm_global_get_status() != NV_OK)
+                goto done;
+        }

        parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
        ++get;
@@ -692,6 +695,7 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
            get = 0;
    }

+done:
    write_get(parent_gpu, get);
 }

@@ -734,18 +738,9 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
    return cmp_access_counter_instance_ptr(a, b);
 }

-// Compare two GPUs
-static inline int cmp_gpu(const uvm_gpu_t *a, const uvm_gpu_t *b)
-{
-    NvU32 id_a = a ? uvm_id_value(a->id) : 0;
-    NvU32 id_b = b ? uvm_id_value(b->id) : 0;
-
-    return UVM_CMP_DEFAULT(id_a, id_b);
-}
-
 // Sort comparator for pointers to GVA access counter notification buffer
-// entries that sorts by va_space, GPU ID, and fault address.
-static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
+// entries that sorts by va_space, and fault address.
+static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
 {
    const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
    const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
@@ -756,10 +751,6 @@ static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, c
    if (result != 0)
        return result;

-    result = cmp_gpu((*a)->gpu, (*b)->gpu);
-    if (result != 0)
-        return result;
-
    return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
 }

@@ -787,7 +778,7 @@ typedef enum
    NOTIFICATION_FETCH_MODE_ALL,
 } notification_fetch_mode_t;

-static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
+static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
                                                 uvm_access_counter_service_batch_context_t *batch_context,
                                                 notification_fetch_mode_t fetch_mode)
 {
@@ -796,12 +787,12 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
    NvU32 notification_index;
    uvm_access_counter_buffer_entry_t *notification_cache;
    uvm_spin_loop_t spin;
-    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
    NvU32 last_instance_ptr_idx = 0;
    uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;

-    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
-    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
+    UVM_ASSERT(gpu->parent->access_counters_supported);

    notification_cache = batch_context->notification_cache;

@@ -830,19 +821,25 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
           (fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
        uvm_access_counter_buffer_entry_t *current_entry = &notification_cache[notification_index];

-        // We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
-        // individually since entries can be written out of order
-        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
+        // We cannot just wait for the last entry (the one pointed by put) to
+        // become valid, we have to do it individually since entries can be
+        // written out of order
+        UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
            // We have some entry to work on. Let's do the rest later.
            if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
                goto done;
+
+            // There's no entry to work on and something has gone wrong. Ignore
+            // the rest.
+            if (uvm_global_get_status() != NV_OK)
+               goto done;
        }

        // Prevent later accesses being moved above the read of the valid bit
        smp_mb__after_atomic();

        // Got valid bit set. Let's cache.
-        parent_gpu->access_counter_buffer_hal->parse_entry(parent_gpu, get, current_entry);
+        gpu->parent->access_counter_buffer_hal->parse_entry(gpu->parent, get, current_entry);

        if (current_entry->address.is_virtual) {
            batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
@@ -858,38 +855,26 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
            }
        }
        else {
-            NvU64 translation_size;
-            uvm_gpu_t *gpu;
-
-            translation_size = get_config_for_type(access_counters,
-                                                   current_entry->counter_type)->translation_size;
+            const NvU64 translation_size = get_config_for_type(access_counters, current_entry->counter_type)->translation_size;
            current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);

            batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;

-            gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
-            if (!gpu) {
-                current_entry->physical_info.resident_id = UVM_ID_INVALID;
-                current_entry->gpu = NULL;
-            }
-            else {
-                current_entry->gpu = gpu;
-                current_entry->physical_info.resident_id =
-                    uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
-                                                                                  current_entry->address.address));
+            current_entry->physical_info.resident_id =
+                uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
+                                                                              current_entry->address.address));

-                if (batch_context->phys.is_single_aperture) {
-                    if (batch_context->phys.num_notifications == 1)
-                        last_aperture = current_entry->address.aperture;
-                    else if (current_entry->address.aperture != last_aperture)
-                        batch_context->phys.is_single_aperture = false;
-                }
-
-                if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
-                    UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
-                else
-                    UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
+            if (batch_context->phys.is_single_aperture) {
+                if (batch_context->phys.num_notifications == 1)
+                    last_aperture = current_entry->address.aperture;
+                else if (current_entry->address.aperture != last_aperture)
+                    batch_context->phys.is_single_aperture = false;
            }
+
+            if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
+                UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
+            else
+                UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
        }

        ++notification_index;
@@ -899,7 +884,7 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
    }

 done:
-    write_get(parent_gpu, get);
+    write_get(gpu->parent, get);

    return notification_index;
 }
@@ -920,16 +905,12 @@ static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_
            // simply be ignored in subsequent processing.
            status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
                                                                     current_entry,
-                                                                     &current_entry->virtual_info.va_space,
-                                                                     &current_entry->gpu);
-            if (status != NV_OK) {
+                                                                     &current_entry->virtual_info.va_space);
+            if (status != NV_OK)
                UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
-                UVM_ASSERT(current_entry->gpu == NULL);
-            }
        }
        else {
            current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
-            current_entry->gpu = batch_context->virt.notifications[i - 1]->gpu;
        }
    }
 }
@@ -953,7 +934,7 @@ static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
    sort(batch_context->virt.notifications,
         batch_context->virt.num_notifications,
         sizeof(*batch_context->virt.notifications),
-         cmp_sort_virt_notifications_by_va_space_gpu_address,
+         cmp_sort_virt_notifications_by_va_space_address,
         NULL);
 }

@@ -971,17 +952,13 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
    }
 }

-static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *parent_gpu,
-                                                          uvm_access_counter_buffer_entry_t **notification_start,
-                                                          NvU32 num_entries,
-                                                          NvU32 flags)
+static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
+                                                uvm_access_counter_buffer_entry_t **notification_start,
+                                                NvU32 num_entries,
+                                                NvU32 flags)
 {
-    uvm_gpu_t *gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
    NV_STATUS status = NV_OK;

-    if (!gpu)
-        return NV_OK;
-
    if (uvm_enable_builtin_tests) {
        // TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
        //                    to va_space instead of broadcasting.
@@ -997,31 +974,6 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
    return status;
 }

-static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
-                                                uvm_gpu_t *gpu,
-                                                uvm_access_counter_buffer_entry_t **notification_start,
-                                                NvU32 num_entries,
-                                                NvU32 flags)
-{
-    NV_STATUS status = NV_OK;
-
-    if (uvm_enable_builtin_tests) {
-        NvU32 i;
-
-        for (i = 0; i < num_entries; i++) {
-            uvm_tools_record_access_counter(va_space,
-                                            gpu->id,
-                                            notification_start[i],
-                                            flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
-        }
-    }
-
-    if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
-        status = access_counter_clear_notifications(gpu, notification_start, num_entries);
-
-    return status;
-}
-
 static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
                                         uvm_va_block_t *va_block,
                                         uvm_va_block_retry_t *va_block_retry,
@@ -1227,13 +1179,13 @@ static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
    }
 }

-static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_context_t *batch_context,
+static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
+                                              uvm_access_counter_service_batch_context_t *batch_context,
                                              const uvm_access_counter_buffer_entry_t *current_entry,
                                              const uvm_reverse_map_t *reverse_mappings,
                                              size_t num_reverse_mappings,
                                              NvU32 *out_flags)
 {
-    uvm_gpu_t *gpu = current_entry->gpu;
    size_t index;
    uvm_va_block_t *va_block = reverse_mappings[0].va_block;
    uvm_va_space_t *va_space = NULL;
@@ -1320,7 +1272,8 @@ done:
    return status;
 }

-static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context_t *batch_context,
+static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
+                                        uvm_access_counter_service_batch_context_t *batch_context,
                                        const uvm_access_counter_buffer_entry_t *current_entry,
                                        const uvm_reverse_map_t *reverse_mappings,
                                        size_t num_reverse_mappings,
@@ -1333,7 +1286,8 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context

    for (index = 0; index < num_reverse_mappings; ++index) {
        NvU32 out_flags_local = 0;
-        status = service_phys_single_va_block(batch_context,
+        status = service_phys_single_va_block(gpu,
+                                              batch_context,
                                              current_entry,
                                              reverse_mappings + index,
                                              1,
@@ -1382,7 +1336,8 @@ static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *revers
 // Service the given translation range. It will return the count of the reverse
 // mappings found during servicing in num_reverse_mappings, even if the function
 // doesn't return NV_OK.
-static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
+static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
+                                                       uvm_gpu_t *resident_gpu,
                                                       uvm_access_counter_service_batch_context_t *batch_context,
                                                       const uvm_gpu_access_counter_type_config_t *config,
                                                       const uvm_access_counter_buffer_entry_t *current_entry,
@@ -1391,7 +1346,6 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
                                                       size_t *num_reverse_mappings,
                                                       NvU32 *out_flags)
 {
-    uvm_gpu_t *gpu = current_entry->gpu;
    NV_STATUS status;
    NvU32 region_start, region_end;

@@ -1429,14 +1383,16 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,

    // Service all the translations
    if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
-        status = service_phys_single_va_block(batch_context,
+        status = service_phys_single_va_block(gpu,
+                                              batch_context,
                                              current_entry,
                                              batch_context->phys.translations,
                                              *num_reverse_mappings,
                                              out_flags);
    }
    else {
-        status = service_phys_va_blocks(batch_context,
+        status = service_phys_va_blocks(gpu,
+                                        batch_context,
                                        current_entry,
                                        batch_context->phys.translations,
                                        *num_reverse_mappings,
@@ -1446,14 +1402,14 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
    return status;
 }

-static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_context_t *batch_context,
-                                           uvm_access_counter_buffer_entry_t *current_entry)
+static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
+                                           uvm_access_counter_service_batch_context_t *batch_context,
+                                           const uvm_access_counter_buffer_entry_t *current_entry,
+                                           NvU32 *out_flags)
 {
    NvU64 address;
    NvU64 translation_index;
-    uvm_gpu_t *gpu = current_entry->gpu;
-    uvm_parent_gpu_t *parent_gpu = gpu->parent;
-    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
    uvm_access_counter_type_t counter_type = current_entry->counter_type;
    const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
    unsigned long sub_granularity;
@@ -1483,13 +1439,14 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
        // fall outside of the allocatable address range. We just drop
        // them.
        if (address >= resident_gpu->mem_info.max_allocatable_address)
-            goto out;
+            return NV_OK;
    }

    for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
        size_t num_reverse_mappings;
        NvU32 out_flags_local = 0;
-        status = service_phys_notification_translation(resident_gpu,
+        status = service_phys_notification_translation(gpu,
+                                                       resident_gpu,
                                                       batch_context,
                                                       config,
                                                       current_entry,
@@ -1510,32 +1467,37 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
    }

    if (uvm_enable_builtin_tests)
-        flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
+        *out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
+
+    if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
+        *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;

-out:
-    notify_tools_broadcast_and_process_flags(parent_gpu, &current_entry, 1, flags);
    return status;
 }

 // TODO: Bug 2018899: Add statistics for dropped access counter notifications
-static NV_STATUS service_phys_notifications(uvm_parent_gpu_t *parent_gpu,
+static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
                                            uvm_access_counter_service_batch_context_t *batch_context)
 {
    NvU32 i;
    uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;

-    UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
+    UVM_ASSERT(gpu->parent->access_counters_can_use_physical_addresses);

    preprocess_phys_notifications(batch_context);

    for (i = 0; i < batch_context->phys.num_notifications; ++i) {
        NV_STATUS status;
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
+        NvU32 flags = 0;

        if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
            continue;

-        status = service_phys_notification(batch_context, current_entry);
+        status = service_phys_notification(gpu, batch_context, current_entry, &flags);
+
+        notify_tools_and_process_flags(gpu, &notifications[i], 1, flags);
+
        if (status != NV_OK)
            return status;
    }
@@ -1672,14 +1634,16 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
        NvU64 address = current_entry->address.address;

-        if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
+        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
+            expand_notification_block(gpu_va_space,
+                                      va_block,
+                                      batch_context->block_service_context.block_context,
+                                      accessed_pages,
+                                      current_entry);
+        }
+        else {
            break;
-
-        expand_notification_block(gpu_va_space,
-                                  va_block,
-                                  batch_context->block_service_context.block_context,
-                                  accessed_pages,
-                                  current_entry);
+        }
    }

    *out_index = i;
@@ -1694,7 +1658,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
    if (status == NV_OK)
        flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;

-    flags_status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], *out_index - index, flags);
+    flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);

    if ((status == NV_OK) && (flags_status != NV_OK))
        status = flags_status;
@@ -1733,7 +1697,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
    if (!vma) {
        // Clear the notification entry to continue receiving access counter
        // notifications when a new VMA is allocated in this range.
-        status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], 1, flags);
+        status = notify_tools_and_process_flags(gpu, &notifications[index], 1, flags);
        *out_index = index + 1;
        return status;
    }
@@ -1747,10 +1711,10 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
        address = current_entry->address.address;

-        if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
+        if ((current_entry->virtual_info.va_space == va_space) && (address < end))
+            uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
+        else
            break;
-
-        uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
    }

    *out_index = i;
@@ -1765,7 +1729,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
    if (status != NV_OK)
        flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;

-    flags_status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], *out_index - index, flags);
+    flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
    if ((status == NV_OK) && (flags_status != NV_OK))
        status = flags_status;

@@ -1817,7 +1781,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
            status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
        }
        else {
-            status = notify_tools_and_process_flags(va_space, gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
+            status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
            *out_index = index + 1;
        }
    }
@@ -1847,11 +1811,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa

        // Clobber status to continue processing the rest of the notifications
        // in the batch.
-        status = notify_tools_and_process_flags(va_space,
-                                                gpu_va_space->gpu,
-                                                batch_context->virt.notifications,
-                                                1,
-                                                flags);
+        status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);

        *out_index = index + 1;
    }
@@ -1859,7 +1819,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
    return status;
 }

-static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
+static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
                                            uvm_access_counter_service_batch_context_t *batch_context)
 {
    NvU32 i = 0;
@@ -1867,19 +1827,18 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
    struct mm_struct *mm = NULL;
    uvm_va_space_t *va_space = NULL;
    uvm_va_space_t *prev_va_space = NULL;
-    uvm_gpu_t *prev_gpu = NULL;
    uvm_gpu_va_space_t *gpu_va_space = NULL;

    // TODO: Bug 4299018 : Add support for virtual access counter migrations on
    //                     4K page sizes.
    if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
-        return notify_tools_broadcast_and_process_flags(parent_gpu,
-                                                        batch_context->virt.notifications,
-                                                        batch_context->virt.num_notifications,
-                                                        0);
+        return notify_tools_and_process_flags(gpu,
+                                              batch_context->virt.notifications,
+                                              batch_context->virt.num_notifications,
+                                              0);
    }

-    preprocess_virt_notifications(parent_gpu, batch_context);
+    preprocess_virt_notifications(gpu->parent, batch_context);

    while (i < batch_context->virt.num_notifications) {
        uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
@@ -1893,38 +1852,25 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
                uvm_va_space_mm_release_unlock(prev_va_space, mm);

                mm = NULL;
-                prev_gpu = NULL;
+                gpu_va_space = NULL;
            }

            // Acquire locks for the new va_space.
            if (va_space) {
                mm = uvm_va_space_mm_retain_lock(va_space);
                uvm_va_space_down_read(va_space);
+
+                gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
            }

            prev_va_space = va_space;
        }

-        if (va_space) {
-            if (prev_gpu != current_entry->gpu) {
-                prev_gpu = current_entry->gpu;
-                gpu_va_space = uvm_gpu_va_space_get(va_space, current_entry->gpu);
-            }
-
-            if (gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
-                status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
-            }
-            else {
-                status = notify_tools_and_process_flags(va_space,
-                                                        current_entry->gpu,
-                                                        &batch_context->virt.notifications[i],
-                                                        1,
-                                                        0);
-                i++;
-            }
+        if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
+            status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
        }
        else {
-            status = notify_tools_broadcast_and_process_flags(parent_gpu, &batch_context->virt.notifications[i], 1, 0);
+            status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
            i++;
        }

@@ -1940,18 +1886,19 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
    return status;
 }

-void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
+
+void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
 {
    NV_STATUS status = NV_OK;
-    uvm_access_counter_service_batch_context_t *batch_context = &parent_gpu->access_counter_buffer_info.batch_service_context;
+    uvm_access_counter_service_batch_context_t *batch_context = &gpu->parent->access_counter_buffer_info.batch_service_context;

-    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(gpu->parent->access_counters_supported);

-    if (parent_gpu->access_counter_buffer_info.notifications_ignored_count > 0)
+    if (gpu->parent->access_counter_buffer_info.notifications_ignored_count > 0)
        return;

    while (1) {
-        batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(parent_gpu,
+        batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(gpu,
                                                                                      batch_context,
                                                                                      NOTIFICATION_FETCH_MODE_BATCH_READY);
        if (batch_context->num_cached_notifications == 0)
@@ -1960,13 +1907,13 @@ void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
        ++batch_context->batch_id;

        if (batch_context->virt.num_notifications) {
-            status = service_virt_notifications(parent_gpu, batch_context);
+            status = service_virt_notifications(gpu, batch_context);
            if (status != NV_OK)
                break;
        }

        if (batch_context->phys.num_notifications) {
-            status = service_phys_notifications(parent_gpu, batch_context);
+            status = service_phys_notifications(gpu, batch_context);
            if (status != NV_OK)
                break;
        }
@@ -1975,7 +1922,7 @@ void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
    if (status != NV_OK) {
        UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
                      nvstatusToString(status),
-                      uvm_parent_gpu_name(parent_gpu));
+                      uvm_gpu_name(gpu));
    }
 }

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -31,7 +31,7 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
 bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);

-void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);

 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -479,14 +479,17 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
    uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
 }

-uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
+static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_gpu_t *gpu;

    // When SMC is enabled, there's no longer a 1:1 relationship between the
-    // parent and the partitions. It's sufficient to return any valid uvm_gpu_t
-    // since the purpose is to have a channel and push buffer for operations
-    // that affect the whole parent GPU.
+    // parent and the partitions.  But because all relevant interrupt paths
+    // are shared, as is the fault reporting logic, it's sufficient here
+    // to proceed with any valid uvm_gpu_t, even if the corresponding partition
+    // didn't cause all, or even any of the interrupts.
+    // The bottom half handlers will later find the appropriate partitions by
+    // attributing the notifications to VA spaces as necessary.
    if (parent_gpu->smc.enabled) {
        NvU32 sub_processor_index;

@@ -515,8 +518,13 @@ uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
 static void replayable_faults_isr_bottom_half(void *args)
 {
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
    unsigned int cpu;

+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    // Record the lock ownership
@@ -537,10 +545,11 @@ static void replayable_faults_isr_bottom_half(void *args)
    ++parent_gpu->isr.replayable_faults.stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_replayable_faults(parent_gpu);
+    uvm_gpu_service_replayable_faults(gpu);

    uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);

+put_kref:
    // It is OK to drop a reference on the parent GPU if a bottom half has
    // been retriggered within uvm_parent_gpu_replayable_faults_isr_unlock,
    // because the rescheduling added an additional reference.
@@ -555,8 +564,13 @@ static void replayable_faults_isr_bottom_half_entry(void *args)
 static void non_replayable_faults_isr_bottom_half(void *args)
 {
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
    unsigned int cpu;

+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

    uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
@@ -570,10 +584,11 @@ static void non_replayable_faults_isr_bottom_half(void *args)
    ++parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_non_replayable_fault_buffer(parent_gpu);
+    uvm_gpu_service_non_replayable_fault_buffer(gpu);

    uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);

+put_kref:
    uvm_parent_gpu_kref_put(parent_gpu);
 }

@@ -585,8 +600,13 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
 static void access_counters_isr_bottom_half(void *args)
 {
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
    unsigned int cpu;

+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
    UVM_ASSERT(parent_gpu->access_counters_supported);

    uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
@@ -600,10 +620,11 @@ static void access_counters_isr_bottom_half(void *args)
    ++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_access_counters(parent_gpu);
+    uvm_gpu_service_access_counters(gpu);

    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);

+put_kref:
    uvm_parent_gpu_kref_put(parent_gpu);
 }

--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -193,10 +193,4 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
 // parent_gpu->isr.interrupts_lock must be held to call this function.
 void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);

-// Return the first valid GPU given the parent GPU or NULL if no MIG instances
-// are registered. This should only be called from bottom halves or if the
-// g_uvm_global.global_lock is held so that the returned pointer remains valid.
-//
-uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
-
 #endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -212,7 +212,6 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par

        // Make sure that all fields in the entry are properly initialized
        fault_entry->va_space = NULL;
-        fault_entry->gpu = NULL;
        fault_entry->is_fatal = (fault_entry->fault_type >= UVM_FAULT_TYPE_FATAL);
        fault_entry->filtered = false;

@@ -236,7 +235,7 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    return NV_OK;
 }

-static bool use_clear_faulted_channel_sw_method(uvm_parent_gpu_t *parent_gpu)
+static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
 {
    // If true, UVM uses a SW method to request RM to do the clearing on its
    // behalf.
@@ -244,7 +243,7 @@ static bool use_clear_faulted_channel_sw_method(uvm_parent_gpu_t *parent_gpu)

    // In SRIOV, the UVM (guest) driver does not have access to the privileged
    // registers used to clear the faulted bit.
-    if (uvm_parent_gpu_is_virt_mode_sriov(parent_gpu))
+    if (uvm_parent_gpu_is_virt_mode_sriov(gpu->parent))
        use_sw_method = true;

    // In Confidential Computing access to the privileged registers is blocked,
@@ -254,17 +253,17 @@ static bool use_clear_faulted_channel_sw_method(uvm_parent_gpu_t *parent_gpu)
        use_sw_method = true;

    if (use_sw_method)
-        UVM_ASSERT(parent_gpu->has_clear_faulted_channel_sw_method);
+        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);

    return use_sw_method;
 }

-static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
+static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
+                                             uvm_user_channel_t *user_channel,
                                             const uvm_fault_buffer_entry_t *fault_entry,
                                             NvU32 batch_id,
                                             uvm_tracker_t *tracker)
 {
-    uvm_gpu_t *gpu = user_channel->gpu;
    NV_STATUS status;
    uvm_push_t push;
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
@@ -284,7 +283,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
        return status;
    }

-    if (use_clear_faulted_channel_sw_method(gpu->parent))
+    if (use_clear_faulted_channel_sw_method(gpu))
        gpu->parent->host_hal->clear_faulted_channel_sw_method(&push, user_channel, fault_entry);
    else
        gpu->parent->host_hal->clear_faulted_channel_method(&push, user_channel, fault_entry);
@@ -306,12 +305,12 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
    return status;
 }

-static NV_STATUS clear_faulted_register_on_gpu(uvm_user_channel_t *user_channel,
+static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
+                                               uvm_user_channel_t *user_channel,
                                               const uvm_fault_buffer_entry_t *fault_entry,
                                               NvU32 batch_id,
                                               uvm_tracker_t *tracker)
 {
-    uvm_gpu_t *gpu = user_channel->gpu;
    NV_STATUS status;

    UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
@@ -329,26 +328,25 @@ static NV_STATUS clear_faulted_register_on_gpu(uvm_user_channel_t *user_channel,
    return NV_OK;
 }

-static NV_STATUS clear_faulted_on_gpu(uvm_user_channel_t *user_channel,
+static NV_STATUS clear_faulted_on_gpu(uvm_gpu_t *gpu,
+                                      uvm_user_channel_t *user_channel,
                                      const uvm_fault_buffer_entry_t *fault_entry,
                                      NvU32 batch_id,
                                      uvm_tracker_t *tracker)
 {
-    uvm_gpu_t *gpu = user_channel->gpu;
+    if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu))
+        return clear_faulted_method_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);

-    if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu->parent))
-        return clear_faulted_method_on_gpu(user_channel, fault_entry, batch_id, tracker);
-
-    return clear_faulted_register_on_gpu(user_channel, fault_entry, batch_id, tracker);
+    return clear_faulted_register_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
 }

-static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
+static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
+                                                       uvm_va_block_t *va_block,
                                                       uvm_va_block_retry_t *va_block_retry,
                                                       uvm_fault_buffer_entry_t *fault_entry,
                                                       uvm_service_block_context_t *service_context,
                                                       const bool hmm_migratable)
 {
-    uvm_gpu_t *gpu = fault_entry->gpu;
    NV_STATUS status = NV_OK;
    uvm_page_index_t page_index;
    uvm_perf_thrashing_hint_t thrashing_hint;
@@ -443,13 +441,13 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
    return status;
 }

-static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
+static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
+                                                uvm_va_block_t *va_block,
                                                uvm_fault_buffer_entry_t *fault_entry,
                                                const bool hmm_migratable)
 {
    NV_STATUS status, tracker_status;
    uvm_va_block_retry_t va_block_retry;
-    uvm_gpu_t *gpu = fault_entry->gpu;
    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;

    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
@@ -461,7 +459,8 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
    uvm_mutex_lock(&va_block->lock);

    status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
-                                       service_managed_fault_in_block_locked(va_block,
+                                       service_managed_fault_in_block_locked(gpu,
+                                                                             va_block,
                                                                             &va_block_retry,
                                                                             fault_entry,
                                                                             service_context,
@@ -503,14 +502,16 @@ static void kill_channel_delayed_entry(void *user_channel)
    UVM_ENTRY_VOID(kill_channel_delayed(user_channel));
 }

-static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_user_channel_t *user_channel)
+static void schedule_kill_channel(uvm_gpu_t *gpu,
+                                  uvm_fault_buffer_entry_t *fault_entry,
+                                  uvm_user_channel_t *user_channel)
 {
    uvm_va_space_t *va_space = fault_entry->va_space;
-    uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
    void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
-                   (fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
+                   (fault_entry->non_replayable.buffer_index * gpu->parent->fault_buffer_hal->entry_size(gpu->parent));

+    UVM_ASSERT(gpu);
    UVM_ASSERT(va_space);
    UVM_ASSERT(user_channel);

@@ -521,7 +522,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
    user_channel->kill_channel.va_space = va_space;

    // Save the packet to be handled by RM in the channel structure
-    memcpy(user_channel->kill_channel.fault_packet, packet, parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
+    memcpy(user_channel->kill_channel.fault_packet, packet, gpu->parent->fault_buffer_hal->entry_size(gpu->parent));

    // Retain the channel here so it is not prematurely destroyed. It will be
    // released after forwarding the fault to RM in kill_channel_delayed.
@@ -532,7 +533,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
                           kill_channel_delayed_entry,
                           user_channel);

-    nv_kthread_q_schedule_q_item(&parent_gpu->isr.kill_channel_q,
+    nv_kthread_q_schedule_q_item(&gpu->parent->isr.kill_channel_q,
                                 &user_channel->kill_channel.kill_channel_q_item);
 }

@@ -549,7 +550,6 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
                                           uvm_fault_buffer_entry_t *fault_entry,
                                           NV_STATUS lookup_status)
 {
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
@@ -557,11 +557,9 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
    NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;

    UVM_ASSERT(!fault_entry->is_fatal);
-    UVM_ASSERT(fault_entry->va_space == va_space);
-    UVM_ASSERT(fault_entry->gpu == gpu);

    // Avoid dropping fault events when the VA block is not found or cannot be created
-    uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
+    uvm_perf_event_notify_gpu_fault(&fault_entry->va_space->perf_events,
                                    NULL,
                                    gpu->id,
                                    UVM_ID_INVALID,
@@ -586,11 +584,11 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,

        ats_invalidate->tlb_batch_pending = false;

-        va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
+        va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);

        // The VA isn't managed. See if ATS knows about it.
        vma = find_vma_intersection(mm, fault_address, fault_address + 1);
-        if (!vma || uvm_ats_check_in_gmmu_region(va_space, fault_address, va_range_next)) {
+        if (!vma || uvm_ats_check_in_gmmu_region(gpu_va_space->va_space, fault_address, va_range_next)) {

            // Do not return error due to logical errors in the application
            status = NV_OK;
@@ -633,24 +631,19 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
-                                    uvm_fault_buffer_entry_t *fault_entry,
-                                    const bool hmm_migratable)
+static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry, const bool hmm_migratable)
 {
    NV_STATUS status;
    uvm_user_channel_t *user_channel;
    uvm_va_block_t *va_block;
-    uvm_va_space_t *va_space;
+    uvm_va_space_t *va_space = NULL;
    struct mm_struct *mm;
    uvm_gpu_va_space_t *gpu_va_space;
-    uvm_gpu_t *gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
-    uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_va_block_context_t *va_block_context =
+        gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;

-    status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
-                                                    fault_entry,
-                                                    &va_space,
-                                                    &gpu);
+    status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, fault_entry, &va_space);
    if (status != NV_OK) {
        // The VA space lookup will fail if we're running concurrently with
        // removal of the channel from the VA space (channel unregister, GPU VA
@@ -664,12 +657,10 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
        // replayable faults only use the address space of their channel.
        UVM_ASSERT(status == NV_ERR_INVALID_CHANNEL);
        UVM_ASSERT(!va_space);
-        UVM_ASSERT(!gpu);
        return NV_OK;
    }

    UVM_ASSERT(va_space);
-    UVM_ASSERT(gpu);

    // If an mm is registered with the VA space, we have to retain it
    // in order to lock it before locking the VA space. It is guaranteed
@@ -680,7 +671,8 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,

    uvm_va_space_down_read(va_space);

-    gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
+    gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
+
    if (!gpu_va_space) {
        // The va_space might have gone away. See the comment above.
        status = NV_OK;
@@ -688,7 +680,6 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
    }

    fault_entry->va_space = va_space;
-    fault_entry->gpu = gpu;

    user_channel = uvm_gpu_va_space_get_user_channel(gpu_va_space, fault_entry->instance_ptr);
    if (!user_channel) {
@@ -701,25 +692,26 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,

    if (!fault_entry->is_fatal) {
        if (mm) {
-            status = uvm_va_block_find_create(va_space,
+            status = uvm_va_block_find_create(fault_entry->va_space,
                                              fault_entry->fault_address,
                                              &va_block_context->hmm.vma,
                                              &va_block);
        }
        else {
-            status = uvm_va_block_find_create_managed(va_space,
+            status = uvm_va_block_find_create_managed(fault_entry->va_space,
                                                      fault_entry->fault_address,
                                                      &va_block);
        }
        if (status == NV_OK)
-            status = service_managed_fault_in_block(va_block, fault_entry, hmm_migratable);
+            status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry, hmm_migratable);
        else
            status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);

        // We are done, we clear the faulted bit on the channel, so it can be
        // re-scheduled again
        if (status == NV_OK && !fault_entry->is_fatal) {
-            status = clear_faulted_on_gpu(user_channel,
+            status = clear_faulted_on_gpu(gpu,
+                                          user_channel,
                                          fault_entry,
                                          non_replayable_faults->batch_id,
                                          &non_replayable_faults->fault_service_tracker);
@@ -728,13 +720,13 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
    }

    if (fault_entry->is_fatal)
-        uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
+        uvm_tools_record_gpu_fatal_fault(gpu->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);

    if (fault_entry->is_fatal ||
        (status != NV_OK &&
         status != NV_WARN_MORE_PROCESSING_REQUIRED &&
         status != NV_WARN_MISMATCHED_TARGET))
-        schedule_kill_channel(fault_entry, user_channel);
+        schedule_kill_channel(gpu, fault_entry, user_channel);

 exit_no_channel:
    uvm_va_space_up_read(va_space);
@@ -743,23 +735,22 @@ exit_no_channel:
    if (status != NV_OK &&
        status != NV_WARN_MORE_PROCESSING_REQUIRED &&
        status != NV_WARN_MISMATCHED_TARGET)
-        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n",
-                      uvm_parent_gpu_name(parent_gpu));
+        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));

    return status;
 }

-static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
+static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
 {
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.non_replayable.block_service_context;
+        &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
    NV_STATUS status;
    bool hmm_migratable = true;

    service_context->num_retries = 0;

    do {
-        status = service_fault_once(parent_gpu, fault_entry, hmm_migratable);
+        status = service_fault_once(gpu, fault_entry, hmm_migratable);
        if (status == NV_WARN_MISMATCHED_TARGET) {
            hmm_migratable = false;
            status = NV_WARN_MORE_PROCESSING_REQUIRED;
@@ -769,7 +760,7 @@ static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_en
    return status;
 }

-void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent_gpu)
+void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
 {
    NvU32 cached_faults;

@@ -781,7 +772,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
        NV_STATUS status;
        NvU32 i;

-        status = fetch_non_replayable_fault_buffer_entries(parent_gpu, &cached_faults);
+        status = fetch_non_replayable_fault_buffer_entries(gpu->parent, &cached_faults);
        if (status != NV_OK)
            return;

@@ -789,7 +780,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
        for (i = 0; i < cached_faults; ++i) {
-            status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
+            status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
            if (status != NV_OK)
                return;
        }
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
@@ -28,7 +28,7 @@

 bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);

-void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);

 NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);

--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@@ -73,5 +73,5 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

 // Service pending replayable faults on the given GPU. This function must be
 // only called from the ISR bottom half
-void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
 #endif // __UVM_GPU_PAGE_FAULT_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -507,11 +507,12 @@ uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semapho
    return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
 }

-NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
+uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
 {
-    char *notifier_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
+    uvm_gpu_semaphore_notifier_t *notifier_base_va =
+        uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);

-    return (NvU32*)(notifier_base_va + semaphore->index * sizeof(NvU32));
+    return notifier_base_va + semaphore->index;
 }

 uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
@@ -519,7 +520,8 @@ uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *sem
    NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
                                                       semaphore->page->pool->gpu);

-    return uvm_gpu_address_virtual_unprotected(notifier_base_va + semaphore->index * sizeof(NvU32));
+    return uvm_gpu_address_virtual_unprotected(notifier_base_va +
+                                               semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
 }

 void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
@@ -583,7 +585,7 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
    return true;
 }

-static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
+bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
 {
    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));

@@ -622,22 +624,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
    uvm_gpu_semaphore_free(&tracking_sem->semaphore);
 }

-static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
+static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
 {
-    // No new value, or the GPU is currently writing the new encrypted material
-    // and no change in value would still result in corrupted data.
-    return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
-}
-
-static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
-{
-    UvmCslIv local_iv;
    NvU32 local_payload;
-    NvU32 new_sem_value;
-    NvU32 gpu_notifier;
-    NvU32 last_observed_notifier;
-    NvU32 new_gpu_notifier = 0;
-    NvU32 iv_index = 0;
+    uvm_gpu_semaphore_notifier_t gpu_notifier;
+    uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;

    // A channel can have multiple entries pending and the tracking semaphore
    // update of each entry can race with this function. Since the semaphore
@@ -646,62 +637,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
    unsigned tries_left = channel->num_gpfifo_entries;
    NV_STATUS status = NV_OK;
    NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
-    UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
-    NvU32 *gpu_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
+    uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);

    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

-    last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
-    gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
-    UVM_ASSERT(last_observed_notifier <= gpu_notifier);
-
-    if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
-        return;
-
    do {
-        gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+        gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
+
+        UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);

        // Odd notifier value means there's an update in progress.
        if (gpu_notifier % 2)
            continue;

+        // There's no change since last time
+        if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
+            return;
+
        // Make sure no memory accesses happen before we read the notifier
        smp_mb__after_atomic();

-        iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
        memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
        local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
-        memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));

        // Make sure the second read of notifier happens after
        // all memory accesses.
        smp_mb__before_atomic();
-        new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+        new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
        tries_left--;
    } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));

    if (!tries_left) {
        status = NV_ERR_INVALID_STATE;
-        goto error;
    }
+    else {
+        NvU32 key_version;
+        const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
+        NvU32 new_semaphore_value;
+
+        UVM_ASSERT(gpu_notifier == new_gpu_notifier);
+        UVM_ASSERT(gpu_notifier % 2 == 0);
+
+        // CPU decryption is guaranteed to use the same key version as the
+        // associated GPU encryption, because if there was any key rotation in
+        // between, then key rotation waited for all channels to complete before
+        // proceeding. The wait implies that the semaphore value matches the
+        // last one encrypted on the GPU, so this CPU decryption should happen
+        // before the key is rotated.
+        key_version = uvm_channel_pool_key_version(channel->pool);

-    if (gpu_notifier == new_gpu_notifier) {
        status = uvm_conf_computing_cpu_decrypt(channel,
-                                                &new_sem_value,
+                                                &new_semaphore_value,
                                                &local_payload,
-                                                &local_iv,
-                                                sizeof(new_sem_value),
+                                                &semaphore->conf_computing.ivs[iv_index],
+                                                key_version,
+                                                sizeof(new_semaphore_value),
                                                &local_auth_tag);

        if (status != NV_OK)
            goto error;

-        uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
+        uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
        UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
-    }

-    return;
+        return;
+    }

 error:
    // Decryption failure is a fatal error as well as running out of try left.
@@ -728,7 +729,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
        // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
        //                     mechanism to all semaphore
        uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
-        uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
+        gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
    }

    new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
@@ -29,6 +29,8 @@
 #include "uvm_rm_mem.h"
 #include "uvm_linux.h"

+typedef NvU32 uvm_gpu_semaphore_notifier_t;
+
 // A GPU semaphore is a memory location accessible by the GPUs and the CPU
 // that's used for synchronization among them.
 // The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
@@ -52,8 +54,8 @@ struct uvm_gpu_semaphore_struct
        UvmCslIv *ivs;
        NvU32 cached_payload;

-        NvU32 last_pushed_notifier;
-        NvU32 last_observed_notifier;
+        uvm_gpu_semaphore_notifier_t last_pushed_notifier;
+        uvm_gpu_semaphore_notifier_t last_observed_notifier;
    } conf_computing;
 };

@@ -154,7 +156,7 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
 NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
 uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);

-NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
+uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
 uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);

 void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,8 +44,6 @@
 #include "clc7b5.h"
 #include "clc86f.h"
 #include "clc8b5.h"
-#include "clc96f.h"
-#include "clc9b5.h"

 static int uvm_downgrade_force_membar_sys = 1;
 module_param(uvm_downgrade_force_membar_sys, uint, 0644);
@@ -166,11 +164,6 @@ static uvm_hal_class_ops_t ce_table[] =
            .decrypt = uvm_hal_hopper_ce_decrypt,
        },
    },
-    {
-        .id = BLACKWELL_DMA_COPY_A,
-        .parent_id = HOPPER_DMA_COPY_A,
-        .u.ce_ops = {},
-    },
 };

 // Table for GPFIFO functions.  Same idea as the copy engine table.
@@ -258,9 +251,6 @@ static uvm_hal_class_ops_t host_table[] =
            .semaphore_release = uvm_hal_turing_host_semaphore_release,
            .clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
            .set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
-            .tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
-            .tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
-            .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
        }
    },
    {
@@ -293,15 +283,6 @@ static uvm_hal_class_ops_t host_table[] =
            .set_gpfifo_pushbuffer_segment_base = uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base,
        }
    },
-    {
-        .id = BLACKWELL_CHANNEL_GPFIFO_A,
-        .parent_id = HOPPER_CHANNEL_GPFIFO_A,
-        .u.host_ops = {
-            .tlb_invalidate_all = uvm_hal_blackwell_host_tlb_invalidate_all,
-            .tlb_invalidate_va = uvm_hal_blackwell_host_tlb_invalidate_va,
-            .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
-        }
-    },
 };

 static uvm_hal_class_ops_t arch_table[] =
@@ -313,6 +294,7 @@ static uvm_hal_class_ops_t arch_table[] =
            .mmu_mode_hal = uvm_hal_mmu_mode_maxwell,
            .enable_prefetch_faults = uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported,
            .disable_prefetch_faults = uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported,
+            .mmu_engine_id_to_type = uvm_hal_maxwell_mmu_engine_id_to_type_unsupported,
            .mmu_client_id_to_utlb_id = uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported,
        }
    },
@@ -338,6 +320,7 @@ static uvm_hal_class_ops_t arch_table[] =
        .u.arch_ops = {
            .init_properties = uvm_hal_volta_arch_init_properties,
            .mmu_mode_hal = uvm_hal_mmu_mode_volta,
+            .mmu_engine_id_to_type = uvm_hal_volta_mmu_engine_id_to_type,
            .mmu_client_id_to_utlb_id = uvm_hal_volta_mmu_client_id_to_utlb_id,
        },
    },
@@ -347,6 +330,7 @@ static uvm_hal_class_ops_t arch_table[] =
        .u.arch_ops = {
            .init_properties = uvm_hal_turing_arch_init_properties,
            .mmu_mode_hal = uvm_hal_mmu_mode_turing,
+            .mmu_engine_id_to_type = uvm_hal_turing_mmu_engine_id_to_type,
        },
    },
    {
@@ -355,6 +339,7 @@ static uvm_hal_class_ops_t arch_table[] =
        .u.arch_ops = {
            .init_properties = uvm_hal_ampere_arch_init_properties,
            .mmu_mode_hal = uvm_hal_mmu_mode_ampere,
+            .mmu_engine_id_to_type = uvm_hal_ampere_mmu_engine_id_to_type,
            .mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
        },
    },
@@ -371,18 +356,10 @@ static uvm_hal_class_ops_t arch_table[] =
        .u.arch_ops = {
            .init_properties = uvm_hal_hopper_arch_init_properties,
            .mmu_mode_hal = uvm_hal_mmu_mode_hopper,
+            .mmu_engine_id_to_type = uvm_hal_hopper_mmu_engine_id_to_type,
            .mmu_client_id_to_utlb_id = uvm_hal_hopper_mmu_client_id_to_utlb_id,
        },
    },
-    {
-        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
-        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
-        .u.arch_ops = {
-            .init_properties = uvm_hal_blackwell_arch_init_properties,
-            .mmu_mode_hal = uvm_hal_mmu_mode_blackwell,
-            .mmu_client_id_to_utlb_id = uvm_hal_blackwell_mmu_client_id_to_utlb_id,
-        }
-    },
 };

 static uvm_hal_class_ops_t fault_buffer_table[] =
@@ -397,7 +374,6 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
            .write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
            .get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
-            .get_mmu_engine_type = uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported,
            .parse_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported,
            .entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
            .entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
@@ -436,7 +412,6 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_volta_fault_buffer_read_get,
            .write_get = uvm_hal_volta_fault_buffer_write_get,
            .get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
-            .get_mmu_engine_type = uvm_hal_volta_fault_buffer_get_mmu_engine_type,
            .parse_replayable_entry = uvm_hal_volta_fault_buffer_parse_replayable_entry,
            .parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
            .get_fault_type = uvm_hal_volta_fault_buffer_get_fault_type,
@@ -448,15 +423,12 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
        .u.fault_buffer_ops = {
            .disable_replayable_faults = uvm_hal_turing_disable_replayable_faults,
            .clear_replayable_faults = uvm_hal_turing_clear_replayable_faults,
-            .get_mmu_engine_type = uvm_hal_turing_fault_buffer_get_mmu_engine_type,
        }
    },
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
-        .u.fault_buffer_ops = {
-            .get_mmu_engine_type = uvm_hal_ampere_fault_buffer_get_mmu_engine_type,
-        }
+        .u.fault_buffer_ops = {}
    },
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
@@ -468,15 +440,6 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
        .u.fault_buffer_ops = {
            .get_ve_id = uvm_hal_hopper_fault_buffer_get_ve_id,
-            .get_mmu_engine_type = uvm_hal_hopper_fault_buffer_get_mmu_engine_type,
-        }
-    },
-    {
-        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
-        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
-        .u.fault_buffer_ops = {
-            .get_fault_type = uvm_hal_blackwell_fault_buffer_get_fault_type,
-            .get_mmu_engine_type = uvm_hal_blackwell_fault_buffer_get_mmu_engine_type,
        }
    },
 };
@@ -541,11 +504,6 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
        .u.access_counter_buffer_ops = {}
    },
-    {
-        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
-        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
-        .u.access_counter_buffer_ops = {}
-    },
 };

 static uvm_hal_class_ops_t sec2_table[] =
@@ -599,11 +557,6 @@ static uvm_hal_class_ops_t sec2_table[] =
            .decrypt = uvm_hal_hopper_sec2_decrypt,
        }
    },
-    {
-        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
-        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
-        .u.sec2_ops = {}
-    },
 };

 static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
@@ -679,19 +632,13 @@ NV_STATUS uvm_hal_init_table(void)
        return status;
    }

-    status = ops_init_from_parent(host_table,
-                                  ARRAY_SIZE(host_table),
-                                  HOST_OP_COUNT,
-                                  offsetof(uvm_hal_class_ops_t, u.host_ops));
+    status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
    if (status != NV_OK) {
        UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
        return status;
    }

-    status = ops_init_from_parent(arch_table,
-                                  ARRAY_SIZE(arch_table),
-                                  ARCH_OP_COUNT,
-                                  offsetof(uvm_hal_class_ops_t, u.arch_ops));
+    status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
    if (status != NV_OK) {
        UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
        return status;
@@ -831,9 +778,6 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)

    gpu = uvm_push_get_gpu(push);

-    // TLB invalidate on Blackwell+ GPUs should not use a standalone membar.
-    UVM_ASSERT(gpu->parent->rm_info.gpuArch < NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100);
-
    for (i = 0; i < gpu->parent->num_hshub_tlb_invalidate_membars; i++)
        gpu->parent->host_hal->membar_gpu(push);

@@ -939,7 +883,7 @@ const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_ty

 const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
 {
-    BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 17);
+    BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 16);

    switch (fault_type) {
        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PDE);
@@ -958,7 +902,6 @@ const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_KIND);
        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_REGION_VIOLATION);
        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_POISONED);
-        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_CC_VIOLATION);
        UVM_ENUM_STRING_DEFAULT();
    }
 }
@@ -989,16 +932,14 @@ const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
 void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
 {
    UVM_DBG_PRINT("fault_address:                    0x%llx\n", entry->fault_address);
-    UVM_DBG_PRINT("    fault_instance_ptr:           {0x%llx:%s}\n",
-                  entry->instance_ptr.address,
-                  uvm_aperture_string(entry->instance_ptr.aperture));
+    UVM_DBG_PRINT("    fault_instance_ptr:           {0x%llx:%s}\n", entry->instance_ptr.address,
+                                                                     uvm_aperture_string(entry->instance_ptr.aperture));
    UVM_DBG_PRINT("    fault_type:                   %s\n", uvm_fault_type_string(entry->fault_type));
    UVM_DBG_PRINT("    fault_access_type:            %s\n", uvm_fault_access_type_string(entry->fault_access_type));
    UVM_DBG_PRINT("    is_replayable:                %s\n", entry->is_replayable? "true": "false");
    UVM_DBG_PRINT("    is_virtual:                   %s\n", entry->is_virtual? "true": "false");
    UVM_DBG_PRINT("    in_protected_mode:            %s\n", entry->in_protected_mode? "true": "false");
-    UVM_DBG_PRINT("    fault_source.client_type:     %s\n",
-                  uvm_fault_client_type_string(entry->fault_source.client_type));
+    UVM_DBG_PRINT("    fault_source.client_type:     %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
    UVM_DBG_PRINT("    fault_source.client_id:       %d\n", entry->fault_source.client_id);
    UVM_DBG_PRINT("    fault_source.gpc_id:          %d\n", entry->fault_source.gpc_id);
    UVM_DBG_PRINT("    fault_source.mmu_engine_id:   %d\n", entry->fault_source.mmu_engine_id);
@@ -1021,15 +962,13 @@ const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_coun
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
 {
    if (!entry->address.is_virtual) {
-        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
-                      entry->address.address,
-                      uvm_aperture_string(entry->address.aperture));
+        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
+                                                         uvm_aperture_string(entry->address.aperture));
    }
    else {
        UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
-        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
-                      entry->virtual_info.instance_ptr.address,
-                      uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
+        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
+                                                    uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
        UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
        UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->virtual_info.mmu_engine_id);
        UVM_DBG_PRINT("    ve_id           %u\n", entry->virtual_info.ve_id);
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -112,10 +112,6 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
                                            uvm_membar_t membar);
-void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
-                                            uvm_gpu_phys_address_t pdb,
-                                            NvU32 depth,
-                                            uvm_membar_t membar);
 void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
@@ -124,10 +120,6 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
                                            uvm_membar_t membar);
-void uvm_hal_blackwell_host_tlb_invalidate_all(uvm_push_t *push,
-                                               uvm_gpu_phys_address_t pdb,
-                                               NvU32 depth,
-                                               uvm_membar_t membar);

 // Issue a TLB invalidate applying to the specified VA range in a PDB.
 //
@@ -157,57 +149,43 @@ typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
                                                 NvU32 depth,
                                                 NvU64 base,
                                                 NvU64 size,
-                                                 NvU64 page_size,
+                                                 NvU32 page_size,
                                                 uvm_membar_t membar);
 void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
                                            uvm_gpu_phys_address_t pdb,
                                            NvU32 depth,
                                            NvU64 base,
                                            NvU64 size,
-                                            NvU64 page_size,
+                                            NvU32 page_size,
                                            uvm_membar_t membar);
 void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
                                           uvm_gpu_phys_address_t pdb,
                                           NvU32 depth,
                                           NvU64 base,
                                           NvU64 size,
-                                           NvU64 page_size,
+                                           NvU32 page_size,
                                           uvm_membar_t membar);
 void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
                                          uvm_gpu_phys_address_t pdb,
                                          NvU32 depth,
                                          NvU64 base,
                                          NvU64 size,
-                                          NvU64 page_size,
+                                          NvU32 page_size,
                                          uvm_membar_t membar);
-void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
-                                           uvm_gpu_phys_address_t pdb,
-                                           NvU32 depth,
-                                           NvU64 base,
-                                           NvU64 size,
-                                           NvU64 page_size,
-                                           uvm_membar_t membar);
 void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
                                           uvm_gpu_phys_address_t pdb,
                                           NvU32 depth,
                                           NvU64 base,
                                           NvU64 size,
-                                           NvU64 page_size,
+                                           NvU32 page_size,
                                           uvm_membar_t membar);
 void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
                                           uvm_gpu_phys_address_t pdb,
                                           NvU32 depth,
                                           NvU64 base,
                                           NvU64 size,
-                                           NvU64 page_size,
+                                           NvU32 page_size,
                                           uvm_membar_t membar);
-void uvm_hal_blackwell_host_tlb_invalidate_va(uvm_push_t *push,
-                                              uvm_gpu_phys_address_t pdb,
-                                              NvU32 depth,
-                                              NvU64 base,
-                                              NvU64 size,
-                                              NvU64 page_size,
-                                              uvm_membar_t membar);

 typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
                                                   uvm_gpu_phys_address_t pdb,
@@ -218,18 +196,12 @@ void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
 void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
-void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
-                                             uvm_gpu_phys_address_t pdb,
-                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
 void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
 void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
                                             uvm_gpu_phys_address_t pdb,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
-void uvm_hal_blackwell_host_tlb_invalidate_test(uvm_push_t *push,
-                                                uvm_gpu_phys_address_t pdb,
-                                                UVM_TEST_INVALIDATE_TLB_PARAMS *params);

 // By default all semaphore release operations include a membar sys before the
 // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -471,31 +443,38 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);

 // Retrieve the page-tree HAL for a given big page size
-typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
+typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
 typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

+// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
+// faults serviced by UVM are handled. On Pascal the only such engine is
+// GRAPHICS, so no translation is provided.
+typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+
 typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
 NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
 NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
-NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id);

 // Replayable faults
 typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
@@ -505,9 +484,6 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
 typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-typedef uvm_mmu_engine_type_t (*uvm_hal_fault_buffer_get_mmu_engine_type_t)(NvU16 mmu_engine_id,
-                                                                            uvm_fault_client_type_t client_type,
-                                                                            NvU16 client_id);

 // Parse the replayable entry at the given buffer index. This also clears the
 // valid bit of the entry in the buffer.
@@ -545,9 +521,6 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-uvm_mmu_engine_type_t uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported(NvU16 mmu_engine_id,
-                                                                                   uvm_fault_client_type_t client_type,
-                                                                                   NvU16 client_id);
 uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);

 void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
@@ -563,31 +536,12 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-uvm_mmu_engine_type_t uvm_hal_volta_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                     uvm_fault_client_type_t client_type,
-                                                                     NvU16 client_id);

 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-uvm_mmu_engine_type_t uvm_hal_turing_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                      uvm_fault_client_type_t client_type,
-                                                                      NvU16 client_id);
-
-uvm_mmu_engine_type_t uvm_hal_ampere_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                      uvm_fault_client_type_t client_type,
-                                                                      NvU16 client_id);
-
 NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-uvm_mmu_engine_type_t uvm_hal_hopper_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                      uvm_fault_client_type_t client_type,
-                                                                      NvU16 client_id);
-
-uvm_mmu_engine_type_t uvm_hal_blackwell_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                         uvm_fault_client_type_t client_type,
-                                                                         NvU16 client_id);
-uvm_fault_type_t uvm_hal_blackwell_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
@@ -811,6 +765,7 @@ struct uvm_arch_hal_struct
    uvm_hal_lookup_mode_hal_t mmu_mode_hal;
    uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
    uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
+    uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
    uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
 };

@@ -823,7 +778,6 @@ struct uvm_fault_buffer_hal_struct
    uvm_hal_fault_buffer_read_get_t read_get;
    uvm_hal_fault_buffer_write_get_t write_get;
    uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
-    uvm_hal_fault_buffer_get_mmu_engine_type_t get_mmu_engine_type;
    uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
    uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
    uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -300,7 +300,6 @@ typedef enum
    UVM_FAULT_TYPE_UNSUPPORTED_KIND,
    UVM_FAULT_TYPE_REGION_VIOLATION,
    UVM_FAULT_TYPE_POISONED,
-    UVM_FAULT_TYPE_CC_VIOLATION,

    UVM_FAULT_TYPE_COUNT
 } uvm_fault_type_t;
@@ -400,7 +399,6 @@ struct uvm_fault_buffer_entry_struct
    //

    uvm_va_space_t                           *va_space;
-    uvm_gpu_t                                     *gpu;

    // This is set to true when some fault could not be serviced and a
    // cancel command needs to be issued
@@ -492,9 +490,9 @@ struct uvm_access_counter_buffer_entry_struct
    // Address of the region for which a notification was sent
    uvm_gpu_address_t address;

+    // These fields are only valid if address.is_virtual is true
    union
    {
-        // These fields are only valid if address.is_virtual is true
        struct
        {
            // Instance pointer of one of the channels in the TSG that triggered
@@ -524,14 +522,9 @@ struct uvm_access_counter_buffer_entry_struct
            // a regular processor id because P2P is not allowed between
            // partitioned GPUs.
            uvm_processor_id_t resident_id;
-
        } physical_info;
    };

-    // This is the GPU that triggered the notification. Note that physical
-    // address based notifications are only supported on non-MIG-capable GPUs.
-    uvm_gpu_t *gpu;
-
    // Number of times the tracked region was accessed since the last time it
    // was cleared. Counter values saturate at the maximum value supported by
    // the GPU (2^16 - 1 in Volta)
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -1601,7 +1601,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
    UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);

    uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
-    uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
+    uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
    uvm_cpu_chunk_free(chunk);
 }

@@ -1706,6 +1706,8 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
        return;
    }

+    // TODO: Bug 3898467: unmap indirect peers when freeing GPU chunks
+
    uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
    gpu_state->chunks[page_index] = NULL;
 }
@@ -1754,6 +1756,8 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
    if (status != NV_OK)
        return status;

+    // TODO: Bug 3898467: map indirect peers.
+
    uvm_processor_mask_set(&va_block->resident, id);
    uvm_page_mask_set(&gpu_state->resident, page_index);

@@ -2274,7 +2278,7 @@ static NV_STATUS populate_region(uvm_va_block_t *va_block,
            // uvm_hmm_invalidate() should handle that if the underlying page
            // is invalidated.
            // Also note there can be an allocated page due to GPU-to-GPU
-            // migration between non-peer GPUs.
+            // migration between non-peer or indirect peer GPUs.
            continue;
        }

--- a/kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_fault_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2024 NVIDIA Corporation
+    Copyright (c) 2020 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -21,7 +21,6 @@

 *******************************************************************************/

-#include "uvm_hal.h"
 #include "uvm_hal_types.h"
 #include "hwref/hopper/gh100/dev_fault.h"

@@ -41,49 +40,3 @@ NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_t
        return 0;
    }
 }
-
-static bool client_id_ce(NvU16 client_id)
-{
-    if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE0 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE9)
-        return true;
-
-    if (client_id >= NV_PFAULT_CLIENT_HUB_HSCE10 && client_id <= NV_PFAULT_CLIENT_HUB_HSCE15)
-        return true;
-
-    switch (client_id) {
-        case NV_PFAULT_CLIENT_HUB_CE0:
-        case NV_PFAULT_CLIENT_HUB_CE1:
-        case NV_PFAULT_CLIENT_HUB_CE2:
-        case NV_PFAULT_CLIENT_HUB_CE3:
-            return true;
-    }
-
-    return false;
-}
-
-uvm_mmu_engine_type_t uvm_hal_hopper_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
-                                                                      uvm_fault_client_type_t client_type,
-                                                                      NvU16 client_id)
-{
-    // Servicing CE and Host (HUB clients) faults.
-    if (client_type == UVM_FAULT_CLIENT_TYPE_HUB) {
-        if (client_id_ce(client_id)) {
-            UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9);
-
-            return UVM_MMU_ENGINE_TYPE_CE;
-        }
-
-        if (client_id == NV_PFAULT_CLIENT_HUB_HOST || client_id == NV_PFAULT_CLIENT_HUB_ESC) {
-            UVM_ASSERT(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44);
-
-            return UVM_MMU_ENGINE_TYPE_HOST;
-        }
-    }
-
-    // We shouldn't be servicing faults from any other engines other than GR.
-    UVM_ASSERT_MSG(client_id <= NV_PFAULT_CLIENT_GPC_ROP_3, "Unexpected client ID: 0x%x\n", client_id);
-    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
-    UVM_ASSERT(client_type == UVM_FAULT_CLIENT_TYPE_GPC);
-
-    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
-}
--- a/kernel-open/nvidia-uvm/uvm_hopper_host.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2024 NVIDIA Corporation
+    Copyright (c) 2020-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -157,7 +157,6 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
    NvU32 pdb_lo;
    NvU32 pdb_hi;
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;

    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);

@@ -184,12 +183,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
        ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
-    NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
+    NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                               HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                     MEM_OP_B, 0,
                     MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
@@ -202,9 +196,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
                     MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
                               HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));

-    // GPU membar still requires an explicit membar method.
-    if (membar == UVM_MEMBAR_GPU)
-        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }

 void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
@@ -212,7 +204,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
                                           NvU32 depth,
                                           NvU64 base,
                                           NvU64 size,
-                                           NvU64 page_size,
+                                           NvU32 page_size,
                                           uvm_membar_t membar)
 {
    NvU32 aperture_value;
@@ -220,7 +212,6 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
    NvU32 pdb_lo;
    NvU32 pdb_hi;
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
    NvU32 va_lo;
    NvU32 va_hi;
    NvU64 end;
@@ -230,9 +221,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
    NvU32 log2_invalidation_size;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
    UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);

    // The invalidation size must be a power-of-two number of pages containing
@@ -286,13 +277,8 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
        ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (membar == UVM_MEMBAR_SYS)
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
    NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
-                               sysmembar_value |
+                               HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                               HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
                               HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
                     MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
@@ -306,9 +292,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
                     MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
                               HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));

-    // GPU membar still requires an explicit membar method.
-    if (membar == UVM_MEMBAR_GPU)
-        gpu->parent->host_hal->membar_gpu(push);
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }

 void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
@@ -316,12 +300,12 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params)
 {
    NvU32 ack_value = 0;
-    NvU32 sysmembar_value = 0;
    NvU32 invalidate_gpc_value = 0;
    NvU32 aperture_value = 0;
    NvU32 pdb_lo = 0;
    NvU32 pdb_hi = 0;
    NvU32 page_table_level = 0;
+    uvm_membar_t membar;

    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
    if (pdb.aperture == UVM_APERTURE_VID)
@@ -348,11 +332,6 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
        ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    if (params->membar == UvmInvalidateTlbMemBarSys)
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
-    else
-        sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
-
    if (params->disable_gpc_invalidate)
        invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
    else
@@ -364,7 +343,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
        NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
        NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);

-        NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
+        NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                                   HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
                                   HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
                         MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
@@ -379,7 +358,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
                                   HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
    }
    else {
-        NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
+        NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
                                   HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
                         MEM_OP_B, 0,
                         MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
@@ -393,9 +372,14 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
                                   HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
    }

-    // GPU membar still requires an explicit membar method.
-    if (params->membar == UvmInvalidateTlbMemBarLocal)
-        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
+    if (params->membar == UvmInvalidateTlbMemBarSys)
+        membar = UVM_MEMBAR_SYS;
+    else if (params->membar == UvmInvalidateTlbMemBarLocal)
+        membar = UVM_MEMBAR_GPU;
+    else
+        membar = UVM_MEMBAR_NONE;
+
+    uvm_hal_tlb_invalidate_membar(push, membar);
 }

 void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2024 NVIDIA Corporation
+    Copyright (c) 2020-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -47,7 +47,21 @@
 #define ATS_ALLOWED 0
 #define ATS_NOT_ALLOWED 1

-static NvU32 page_table_depth_hopper(NvU64 page_size)
+uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
+{
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
+        return UVM_MMU_ENGINE_TYPE_HOST;
+
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
+        return UVM_MMU_ENGINE_TYPE_CE;
+
+    // We shouldn't be servicing faults from any other engines
+    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
+
+    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
+}
+
+static NvU32 page_table_depth_hopper(NvU32 page_size)
 {
    // The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
    if (page_size == UVM_PAGE_SIZE_2M)
@@ -65,7 +79,7 @@ static NvU32 entries_per_index_hopper(NvU32 depth)
    return 1;
 }

-static NvLength entry_offset_hopper(NvU32 depth, NvU64 page_size)
+static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 6);
    if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
@@ -78,7 +92,7 @@ static NvLength entry_size_hopper(NvU32 depth)
    return entries_per_index_hopper(depth) * 8;
 }

-static NvU32 index_bits_hopper(NvU32 depth, NvU64 page_size)
+static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
 {
    static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};

@@ -106,7 +120,7 @@ static NvU32 num_va_bits_hopper(void)
    return 57;
 }

-static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
+static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 6);
    if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
@@ -219,7 +233,7 @@ static NvU64 make_sparse_pte_hopper(void)
           HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
 }

-static NvU64 unmapped_pte_hopper(NvU64 page_size)
+static NvU64 unmapped_pte_hopper(NvU32 page_size)
 {
    // Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
    // the corresponding 4k PTEs to be ignored. This allows the invalidation of
@@ -476,7 +490,7 @@ static void make_pde_hopper(void *entry,

 static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;

-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size)
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
 {
    static bool initialized = false;

--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2024 NVidia Corporation
+    Copyright (c) 2013-2023 NVidia Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -494,7 +494,7 @@ typedef struct
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
    NvU64                   offset                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NvS32                   rmCtrlFd;                                          // IN
    NvU32                   hClient;                                           // IN
@@ -837,6 +837,12 @@ typedef struct
 // Initialize any tracker object such as a queue or counter
 // UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
 // UvmToolsCreateProcessorCounters.
+// Note that the order of structure elements has the version as the last field.
+// This is used to tell whether the kernel supports V2 events or not because
+// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
+// field but V2 will. This is needed because it is possible to create an event
+// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
+// been called yet and the kernel version is unknown.
 //
 #define UVM_TOOLS_INIT_EVENT_TRACKER                                  UVM_IOCTL_BASE(56)
 typedef struct
@@ -847,8 +853,9 @@ typedef struct
    NvProcessorUuid processor;                            // IN
    NvU32           allProcessors;                        // IN
    NvU32           uvmFd;                                // IN
-    NvU32           version;                              // IN (UvmToolsEventQueueVersion)
    NV_STATUS       rmStatus;                             // OUT
+    NvU32           requestedVersion;                     // IN
+    NvU32           grantedVersion;                       // OUT
 } UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;

 //
@@ -929,15 +936,23 @@ typedef struct

 //
 // UvmToolsGetProcessorUuidTable
+// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
+// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
+// processor IDs (physical GPU UUIDs) will be reported.
+// tablePtr == 0 and count == 0 can be used to query how many processors are
+// present in order to dynamically allocate the correct size array since the
+// total number of processors is returned in 'count'.
 //
 #define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE                            UVM_IOCTL_BASE(64)
 typedef struct
 {
    NvU64     tablePtr                 NV_ALIGN_BYTES(8); // IN
-    NvU32     version;                                    // IN (UvmToolsEventQueueVersion)
+    NvU32     count;                                      // IN/OUT
    NV_STATUS rmStatus;                                   // OUT
+    NvU32     version;                                    // OUT
 } UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;

+
 //
 // UvmMapDynamicParallelismRegion
 //
@@ -980,7 +995,7 @@ typedef struct
 {
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NV_STATUS               rmStatus;                                          // OUT
 } UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@@ -27,7 +27,7 @@

 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
 {
-    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);

    switch (lock_order) {
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
@@ -48,7 +48,9 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION_WLC);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@@ -322,6 +322,15 @@
 //      Operations not allowed while holding this lock
 //      - GPU memory allocation which can evict
 //
+// - Channel pool key rotation lock
+//      Order: UVM_LOCK_ORDER_KEY_ROTATION
+//      Condition: Confidential Computing is enabled
+//      Mutex per channel pool
+//
+//      The lock ensures mutual exclusion during key rotation affecting all the
+//      channels in the associated pool. Key rotation in WLC pools is handled
+//      using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
+//
 // - CE channel CSL channel pool semaphore
 //      Order: UVM_LOCK_ORDER_CSL_PUSH
 //      Condition: The Confidential Computing feature is enabled
@@ -338,6 +347,15 @@
 //      Operations allowed while holding this lock
 //      - Pushing work to CE channels (except for WLC channels)
 //
+// - WLC channel pool key rotation lock
+//      Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
+//      Condition: Confidential Computing is enabled
+//      Mutex of WLC channel pool
+//
+//      The lock has the same purpose as the regular channel pool key rotation
+//      lock. Using a different order lock for WLC channels allows key rotation
+//      on those channels during indirect work submission.
+//
 // - WLC CSL channel pool semaphore
 //      Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
 //      Condition: The Confidential Computing feature is enabled
@@ -484,7 +502,9 @@ typedef enum
    UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
    UVM_LOCK_ORDER_CHUNK_MAPPING,
    UVM_LOCK_ORDER_PAGE_TREE,
+    UVM_LOCK_ORDER_KEY_ROTATION,
    UVM_LOCK_ORDER_CSL_PUSH,
+    UVM_LOCK_ORDER_KEY_ROTATION_WLC,
    UVM_LOCK_ORDER_CSL_WLC_PUSH,
    UVM_LOCK_ORDER_CSL_SEC2_PUSH,
    UVM_LOCK_ORDER_PUSH,
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -61,7 +61,7 @@ typedef struct
    size_t buffer_size;

    // Page size in bytes
-    NvU64 page_size;
+    NvU32 page_size;

    // Size of a single PTE in bytes
    NvU32 pte_size;
@@ -91,7 +91,7 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
                                     uvm_gpu_t *gpu,
                                     const uvm_map_rm_params_t *map_rm_params,
                                     NvU64 length,
-                                     NvU64 page_size,
+                                     NvU32 page_size,
                                     uvm_pte_buffer_t *pte_buffer)
 {
    uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_range->va_space, gpu);
@@ -650,7 +650,9 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
        return NV_OK;
    }
    // This is a local or peer allocation, so the owning GPU must have been
-    // registered. This also checks for if EGM owning GPU is registered.
+    // registered.
+    // This also checks for if EGM owning GPU is registered.
+
    owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
    if (!owning_gpu)
        return NV_ERR_INVALID_DEVICE;
@@ -663,6 +665,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
    // semantics of sysmem allocations.

    // Check if peer access for peer memory is enabled.
+    // This path also handles EGM allocations.
    if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
        // TODO: Bug 1757136: In SLI, the returned UUID may be different but a
        //       local mapping must be used. We need to query SLI groups to know
@@ -853,10 +856,9 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
    uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
    UvmGpuMemoryInfo mem_info;
    uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
-    NvU64 mapping_page_size;
-    NvU64 biggest_mapping_page_size;
+    NvU32 mapping_page_size;
    NvU64 alignments;
-    NvU64 smallest_alignment;
+    NvU32 smallest_alignment;
    NV_STATUS status;

    uvm_assert_rwsem_locked_read(&va_space->lock);
@@ -945,11 +947,9 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,

    // Check for the maximum page size for the mapping of vidmem allocations,
    // the vMMU segment size may limit the range of page sizes.
-    biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
-                                                                mapping_gpu->mem_info.max_vidmem_page_size);
    if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
-        (mapping_page_size > biggest_mapping_page_size))
-        mapping_page_size = biggest_mapping_page_size;
+        (mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
+        mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;

    mem_info.pageSize = mapping_page_size;

@@ -986,7 +986,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
    if (uvm_api_range_invalid_4k(params->base, params->length))
        return NV_ERR_INVALID_ADDRESS;

-    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
+    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
        return NV_ERR_INVALID_ARGUMENT;

    mapped_gpus = uvm_processor_mask_cache_alloc();
--- a/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -74,14 +74,6 @@ NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm
    return 0;
 }

-uvm_mmu_engine_type_t uvm_hal_maxwell_fault_buffer_get_mmu_engine_type_unsupported(NvU16 mmu_engine_id,
-                                                                                   uvm_fault_client_type_t client_type,
-                                                                                   NvU16 client_id)
-{
-    UVM_ASSERT_MSG(false, "fault_buffer_get_mmu_engine_type is not supported on Maxwell GPUs.\n");
-    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
-}
-
 uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry)
 {
    UVM_ASSERT_MSG(false, "fault_buffer_get_fault_type is not supported.\n");
--- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c
@@ -108,7 +108,7 @@ void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
                                            NvU32 depth,
                                            NvU64 base,
                                            NvU64 size,
-                                            NvU64 page_size,
+                                            NvU32 page_size,
                                            uvm_membar_t membar)
 {
    // No per VA invalidate on Maxwell, redirect to invalidate all.
--- a/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
@@ -38,7 +38,6 @@
 #include "uvm_forward_decl.h"
 #include "uvm_gpu.h"
 #include "uvm_mmu.h"
-#include "uvm_hal.h"
 #include "uvm_push_macros.h"
 #include "hwref/maxwell/gm107/dev_mmu.h"

@@ -53,7 +52,7 @@ static NvU32 entries_per_index_maxwell(NvU32 depth)
    return 1;
 }

-static NvLength entry_offset_maxwell(NvU32 depth, NvU64 page_size)
+static NvLength entry_offset_maxwell(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 2);
    if (page_size == UVM_PAGE_SIZE_4K && depth == 0)
@@ -129,7 +128,7 @@ static NvLength entry_size_maxwell(NvU32 depth)
    return 8;
 }

-static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
+static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 2);
    UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
@@ -147,7 +146,7 @@ static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
    }
 }

-static NvU32 index_bits_maxwell_128(NvU32 depth, NvU64 page_size)
+static NvU32 index_bits_maxwell_128(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 2);
    UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
@@ -170,32 +169,32 @@ static NvU32 num_va_bits_maxwell(void)
    return 40;
 }

-static NvLength allocation_size_maxwell_64(NvU32 depth, NvU64 page_size)
+static NvLength allocation_size_maxwell_64(NvU32 depth, NvU32 page_size)
 {
    return entry_size_maxwell(depth) << index_bits_maxwell_64(depth, page_size);
 }

-static NvLength allocation_size_maxwell_128(NvU32 depth, NvU64 page_size)
+static NvLength allocation_size_maxwell_128(NvU32 depth, NvU32 page_size)
 {
    return entry_size_maxwell(depth) << index_bits_maxwell_128(depth, page_size);
 }

-static NvU32 page_table_depth_maxwell(NvU64 page_size)
+static NvU32 page_table_depth_maxwell(NvU32 page_size)
 {
    return 1;
 }

-static NvU64 page_sizes_maxwell_128(void)
+static NvU32 page_sizes_maxwell_128(void)
 {
    return UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_4K;
 }

-static NvU64 page_sizes_maxwell_64(void)
+static NvU32 page_sizes_maxwell_64(void)
 {
    return UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
 }

-static NvU64 unmapped_pte_maxwell(NvU64 page_size)
+static NvU64 unmapped_pte_maxwell(NvU32 page_size)
 {
    // Setting the privilege bit on an otherwise-zeroed big PTE causes the
    // corresponding 4k PTEs to be ignored. This allows the invalidation of a
@@ -357,7 +356,7 @@ static uvm_mmu_mode_hal_t maxwell_128_mmu_mode_hal =
    .page_sizes = page_sizes_maxwell_128
 };

-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size)
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size)
 {
    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
    if (big_page_size == UVM_PAGE_SIZE_64K)
@@ -376,6 +375,12 @@ void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *p
    UVM_ASSERT_MSG(false, "mmu disable_prefetch_faults called on Maxwell GPU\n");
 }

+uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id)
+{
+    UVM_ASSERT(0);
+    return UVM_MMU_ENGINE_TYPE_COUNT;
+}
+
 NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id)
 {
    UVM_ASSERT(0);
--- a/kernel-open/nvidia-uvm/uvm_mem.c
+++ b/kernel-open/nvidia-uvm/uvm_mem.c
@@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
    // Get the mmu mode hal directly as the internal address space tree has not
    // been created yet.
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
-    NvU64 page_sizes = hal->page_sizes();
+    NvU32 page_sizes = hal->page_sizes();

    return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
 }

-static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
+static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
 {
-    NvU64 biggest_page_size;
-    NvU64 chunk_size;
+    NvU32 biggest_page_size;
+    NvU32 chunk_size;

    if (uvm_mem_is_sysmem(mem))
        return PAGE_SIZE;
@@ -315,12 +315,12 @@ static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
    // When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
    // chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
    if (mem->backing_gpu->mem_info.numa.enabled)
-        chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);
+        chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);

    return chunk_size;
 }

-static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
+static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
 {
    if (uvm_mem_is_vidmem(mem)) {
        // For vidmem allocations the chunk size is picked out of the supported
@@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
    NvU64 *dma_addrs;

    UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
-                   "mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
+                   "mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
                   mem->chunk_size);
    UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));

@@ -528,9 +528,10 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)

 // In case of failure, the caller is required to handle cleanup by calling
 // uvm_mem_free
-static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
+static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
 {
    NV_STATUS status;
+    uvm_pmm_gpu_memory_type_t mem_type;

    UVM_ASSERT(uvm_mem_is_vidmem(mem));

@@ -547,15 +548,23 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
    if (!mem->vidmem.chunks)
        return NV_ERR_NO_MEMORY;

-    status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
-                                      mem->chunks_count,
-                                      mem->chunk_size,
-                                      UVM_PMM_ALLOC_FLAGS_NONE,
-                                      mem->vidmem.chunks,
-                                      NULL);
+    // When CC is disabled the behavior is identical to that of PMM, and the
+    // protection flag is ignored (squashed by PMM internally).
+    if (is_unprotected)
+        mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
+    else
+        mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
+
+    status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
+                               mem->chunks_count,
+                               mem->chunk_size,
+                               mem_type,
+                               UVM_PMM_ALLOC_FLAGS_NONE,
+                               mem->vidmem.chunks,
+                               NULL);

    if (status != NV_OK) {
-        UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
+        UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
                      mem->chunks_count,
                      mem->chunk_size,
                      nvstatusToString(status));
@@ -565,7 +574,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
    return NV_OK;
 }

-static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
+static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
 {
    if (uvm_mem_is_sysmem(mem)) {
        gfp_t gfp_flags;
@@ -587,7 +596,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
        return status;
    }

-    return mem_alloc_vidmem_chunks(mem, zero);
+    return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
 }

 NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
@@ -617,6 +626,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
    NV_STATUS status;
    NvU64 physical_size;
    uvm_mem_t *mem = NULL;
+    bool is_unprotected = false;

    UVM_ASSERT(params->size > 0);

@@ -638,7 +648,12 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
    physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
    mem->chunks_count = physical_size / mem->chunk_size;

-    status = mem_alloc_chunks(mem, params->mm, params->zero);
+    if (params->is_unprotected)
+        UVM_ASSERT(uvm_mem_is_vidmem(mem));
+
+    is_unprotected = params->is_unprotected;
+
+    status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
    if (status != NV_OK)
        goto error;

@@ -1035,7 +1050,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
                             uvm_page_table_range_vec_t **range_vec)
 {
    NV_STATUS status;
-    NvU64 page_size;
+    NvU32 page_size;
    uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;

    uvm_mem_pte_maker_data_t pte_maker_data = {
@@ -1044,7 +1059,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
        };

    page_size = mem_pick_gpu_page_size(mem, gpu, tree);
-    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);
+    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);

    // When the Confidential Computing feature is enabled, DMA allocations are
    // majoritarily allocated and managed by a per-GPU DMA buffer pool
--- a/kernel-open/nvidia-uvm/uvm_mem.h
+++ b/kernel-open/nvidia-uvm/uvm_mem.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -126,7 +126,12 @@ typedef struct
    //
    // CPU mappings will always use PAGE_SIZE, so the physical allocation chunk
    // has to be aligned to PAGE_SIZE.
-    NvU64 page_size;
+    NvU32 page_size;
+
+    // The protection flag is only observed for vidmem allocations when CC is
+    // enabled. If set to true, the allocation returns unprotected vidmem;
+    // otherwise, the allocation returns protected vidmem.
+    bool is_unprotected;

    // If true, the allocation is zeroed (scrubbed).
    bool zero;
@@ -194,7 +199,7 @@ struct uvm_mem_struct
    size_t chunks_count;

    // Size of each physical chunk (vidmem) or CPU page (sysmem)
-    NvU64 chunk_size;
+    NvU32 chunk_size;

    // Size of the allocation
    NvU64 size;
@@ -324,7 +329,8 @@ uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU6
 uvm_gpu_address_t uvm_mem_gpu_address_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size);

 // Helper to get an address suitable for accessing_gpu (which may be the backing
-// GPU) to access with CE.
+// GPU) to access with CE. Note that mappings for indirect peers are not
+// created automatically.
 uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_gpu, NvU64 offset, NvU64 size);

 static bool uvm_mem_is_sysmem(uvm_mem_t *mem)
--- a/kernel-open/nvidia-uvm/uvm_mem_test.c
+++ b/kernel-open/nvidia-uvm/uvm_mem_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -33,7 +33,7 @@

 static const size_t sysmem_alloc_sizes[] = { 1, PAGE_SIZE - 1, PAGE_SIZE, 7 * PAGE_SIZE };

-static NvU64 first_page_size(NvU64 page_sizes)
+static NvU32 first_page_size(NvU32 page_sizes)
 {
    return page_sizes & ~(page_sizes - 1);
 }
@@ -43,7 +43,7 @@ static NvU64 first_page_size(NvU64 page_sizes)
         page_size;                                                                 \
         page_size = first_page_size((page_sizes) & ~(page_size | (page_size - 1))))

-static inline NV_STATUS mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
+static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
 {
    if (g_uvm_global.conf_computing_enabled)
        return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
@@ -67,7 +67,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
    UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
    UVM_ASSERT(verif_size >= sizeof(*sys_verif));

-    TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(verif_size, gpu, &sys_mem), done);
+    TEST_NV_CHECK_GOTO(__alloc_map_sysmem(verif_size, gpu, &sys_mem), done);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(sys_mem, gpu), done);

    sys_verif = (NvU64*)uvm_mem_get_cpu_addr_kernel(sys_mem);
@@ -100,9 +100,9 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
                                 "Memcopy %zd bytes from virtual sys_mem 0x%llx to %s mem 0x%llx [mem loc: %s, page size: %u]",
                                 size_this_time,
                                 sys_mem_gpu_address.address,
-                                 mem_gpu_address.is_virtual ? "virtual" : "physical",
+                                 mem_gpu_address.is_virtual? "virtual" : "physical",
                                 mem_gpu_address.address,
-                                 uvm_mem_is_sysmem(mem) ? "sys" : "vid",
+                                 uvm_mem_is_sysmem(mem)? "sys" : "vid",
                                 mem->chunk_size);

        gpu->parent->ce_hal->memcopy(&push, mem_gpu_address, sys_mem_gpu_address, size_this_time);
@@ -140,7 +140,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
                                 "Memcopy %zd bytes from virtual mem 0x%llx to %s sys_mem 0x%llx",
                                 size_this_time,
                                 mem_gpu_address.address,
-                                 sys_mem_gpu_address.is_virtual ? "virtual" : "physical",
+                                 sys_mem_gpu_address.is_virtual? "virtual" : "physical",
                                 sys_mem_gpu_address.address);

        gpu->parent->ce_hal->memcopy(&push, sys_mem_gpu_address, mem_gpu_address, size_this_time);
@@ -153,7 +153,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)

    for (i = 0; i < verif_size / sizeof(*sys_verif); ++i) {
        if (sys_verif[i] != mem->size + i) {
-            UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%llu, processor=%u)\n",
+            UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%u, processor=%u)\n",
                           i,
                           sys_verif[i],
                           (NvU64)(verif_size + i),
@@ -241,7 +241,7 @@ static NV_STATUS test_map_cpu(uvm_mem_t *mem)
    return NV_OK;
 }

-static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
+static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
 {
    NV_STATUS status;
    uvm_mem_t *mem;
@@ -252,9 +252,10 @@ static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, si
    params.page_size = page_size;
    params.mm = current->mm;

-    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&params, &mem), error);
+    status = uvm_mem_alloc(&params, &mem);
+    TEST_CHECK_GOTO(status == NV_OK, error);

-    TEST_NV_CHECK_GOTO(test_map_cpu(mem), error);
+    TEST_CHECK_GOTO(test_map_cpu(mem) == NV_OK, error);

    for_each_va_space_gpu(gpu, va_space)
        TEST_NV_CHECK_GOTO(test_map_gpu(mem, gpu), error);
@@ -265,7 +266,6 @@ static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, si

 error:
    uvm_mem_free(mem);
-
    return status;
 }

@@ -299,7 +299,7 @@ error:
    return status;
 }

-static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
+static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
 {
    NV_STATUS status;
    uvm_mem_t *mem;
@@ -334,7 +334,7 @@ error:
    return status;
 }

-static bool should_test_page_size(size_t alloc_size, NvU64 page_size)
+static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
 {
    if (g_uvm_global.num_simulated_devices == 0)
        return true;
@@ -352,22 +352,21 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
    NvU32 current_alloc = 0;

    // Create allocations of these sizes
-    static const size_t sizes[] = { 1, 4, 16, 1024, 4096, 1024 * 1024, 7 * 1024 * 1024 + 17 };
+    static const size_t sizes[] = {1, 4, 16, 1024, 4096, 1024 * 1024, 7 * 1024 * 1024 + 17 };

    // Pascal+ can map sysmem with 4K, 64K and 2M PTEs, other GPUs can only use
    // 4K. Test all of the sizes supported by Pascal+ and 128K to match big page
    // size on pre-Pascal GPUs with 128K big page size.
    // Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
    // 2M, we don't test for this page size.
-    // Blackwell+ also supports 256G PTEs and the above holds for this case too.
-
-    static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
+    static const NvU32 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;

    // All supported page sizes will be tested, CPU has the most with 4 and +1
    // for the default.
    static const int max_supported_page_sizes = 4 + 1;
    int i;

+
    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
    if (g_uvm_global.conf_computing_enabled)
@@ -387,13 +386,13 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
        return NV_ERR_NO_MEMORY;

    for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
-        NvU64 page_size = 0;
+        NvU32 page_size = 0;
        uvm_mem_t *mem;

        if (should_test_page_size(sizes[i], UVM_PAGE_SIZE_DEFAULT)) {
            status = test_alloc_sysmem(va_space, UVM_PAGE_SIZE_DEFAULT, sizes[i], &mem);
            if (status != NV_OK) {
-                UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size default\n", sizes[i]);
+                UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size default\n", sizes[i], page_size);
                goto cleanup;
            }
            all_mem[current_alloc++] = mem;
@@ -405,14 +404,14 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)

            status = test_alloc_sysmem(va_space, page_size, sizes[i], &mem);
            if (status != NV_OK) {
-                UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size %llu\n", sizes[i], page_size);
+                UVM_TEST_PRINT("Failed to alloc sysmem size %zd, page_size %u\n", sizes[i], page_size);
                goto cleanup;
            }
            all_mem[current_alloc++] = mem;
        }

        for_each_va_space_gpu(gpu, va_space) {
-            NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
+            NvU32 page_sizes = gpu->address_space_tree.hal->page_sizes();

            UVM_ASSERT(max_supported_page_sizes >= hweight_long(page_sizes));

@@ -429,7 +428,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
            for_each_page_size(page_size, page_sizes) {
                status = test_alloc_vidmem(gpu, page_size, sizes[i], &mem);
                if (status != NV_OK) {
-                    UVM_TEST_PRINT("Test alloc vidmem failed, page_size %llu size %zd GPU %s\n",
+                    UVM_TEST_PRINT("Test alloc vidmem failed, page_size %u size %zd GPU %s\n",
                                   page_size,
                                   sizes[i],
                                   uvm_gpu_name(gpu));
@@ -462,17 +461,17 @@ cleanup:
 static NV_STATUS test_basic_vidmem(uvm_gpu_t *gpu)
 {
    NV_STATUS status = NV_OK;
-    NvU64 page_size;
-    NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
-    NvU64 biggest_page_size = uvm_mmu_biggest_page_size_up_to(&gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
-    NvU64 smallest_page_size = page_sizes & ~(page_sizes - 1);
+    NvU32 page_size;
+    NvU32 page_sizes = gpu->address_space_tree.hal->page_sizes();
+    NvU32 biggest_page_size = uvm_mmu_biggest_page_size_up_to(&gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
+    NvU32 smallest_page_size = page_sizes & ~(page_sizes - 1);
    uvm_mem_t *mem = NULL;

    page_sizes &= UVM_CHUNK_SIZES_MASK;
    for_each_page_size(page_size, page_sizes) {
        TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(page_size - 1, gpu, &mem) == NV_OK, done);
        if (gpu->mem_info.numa.enabled)
-            TEST_CHECK_GOTO(mem->chunk_size >= PAGE_SIZE && mem->chunk_size <= max(page_size, (NvU64)PAGE_SIZE), done);
+            TEST_CHECK_GOTO(mem->chunk_size >= PAGE_SIZE && mem->chunk_size <= max(page_size, (NvU32)PAGE_SIZE), done);
        else
            TEST_CHECK_GOTO(mem->chunk_size < page_size || page_size == smallest_page_size, done);
        uvm_mem_free(mem);
@@ -480,14 +479,14 @@ static NV_STATUS test_basic_vidmem(uvm_gpu_t *gpu)

        TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(page_size, gpu, &mem) == NV_OK, done);
        if (gpu->mem_info.numa.enabled)
-            TEST_CHECK_GOTO(mem->chunk_size == max(page_size, (NvU64)PAGE_SIZE), done);
+            TEST_CHECK_GOTO(mem->chunk_size == max(page_size, (NvU32)PAGE_SIZE), done);
        else
            TEST_CHECK_GOTO(mem->chunk_size == page_size, done);
        uvm_mem_free(mem);
        mem = NULL;
    }

-    TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(5 * biggest_page_size - 1, gpu, &mem) == NV_OK, done);
+    TEST_CHECK_GOTO(uvm_mem_alloc_vidmem(5 * ((NvU64)biggest_page_size) - 1, gpu, &mem) == NV_OK, done);
    TEST_CHECK_GOTO(mem->chunk_size == biggest_page_size, done);

 done:
@@ -495,6 +494,41 @@ done:
    return status;
 }

+static NV_STATUS test_basic_vidmem_unprotected(uvm_gpu_t *gpu)
+{
+    NV_STATUS status = NV_OK;
+    uvm_mem_t *mem = NULL;
+
+    uvm_mem_alloc_params_t params = { 0 };
+    params.size = UVM_PAGE_SIZE_4K;
+    params.backing_gpu = gpu;
+    params.page_size = UVM_PAGE_SIZE_4K;
+
+    // If CC is enabled, the protection flag is observed. Because currently all
+    // vidmem is in the protected region, the allocation should succeed.
+    //
+    // If CC is disabled, the protection flag is ignored.
+    params.is_unprotected = false;
+    TEST_NV_CHECK_RET(uvm_mem_alloc(&params, &mem));
+
+    uvm_mem_free(mem);
+    mem = NULL;
+
+    // If CC is enabled, the allocation should fail because currently the
+    // unprotected region is empty.
+    //
+    // If CC is disabled, the behavior should be identical to that of a
+    // protected allocation.
+    params.is_unprotected = true;
+    if (g_uvm_global.conf_computing_enabled)
+        TEST_CHECK_RET(uvm_mem_alloc(&params, &mem) == NV_ERR_NO_MEMORY);
+    else
+        TEST_NV_CHECK_RET(uvm_mem_alloc(&params, &mem));
+
+    uvm_mem_free(mem);
+    return status;
+}
+
 static NV_STATUS test_basic_sysmem(void)
 {
    NV_STATUS status = NV_OK;
@@ -570,135 +604,6 @@ done:
    return status;
 }

-static NV_STATUS check_huge_page_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem, NvU64 offset)
-{
-    NV_STATUS status = NV_OK;
-    uvm_mem_t *sys_mem = NULL;
-    uvm_push_t push;
-    NvU64 *sys_verif;
-    NvU64 *expected_value;
-    NvU64 verif_size = mem->size;
-    uvm_gpu_address_t mem_gpu_address, sys_mem_gpu_address;
-
-    UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
-
-    TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(verif_size, gpu, &sys_mem), done);
-    sys_verif = uvm_mem_get_cpu_addr_kernel(sys_mem);
-    memset(sys_verif, 0x0, mem->size);
-
-    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(sys_mem, gpu), done);
-
-    mem_gpu_address = uvm_gpu_address_virtual(offset);
-    sys_mem_gpu_address = uvm_mem_gpu_address_virtual_kernel(sys_mem, gpu);
-
-    TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
-                                      UVM_CHANNEL_TYPE_GPU_TO_CPU,
-                                      &push,
-                                      "Memcopy %llu bytes from virtual mem 0x%llx to virtual sys_mem 0x%llx",
-                                      verif_size,
-                                      mem_gpu_address.address,
-                                      sys_mem_gpu_address.address),
-                       done);
-
-    gpu->parent->ce_hal->memcopy(&push, sys_mem_gpu_address, mem_gpu_address, verif_size);
-    TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), done);
-
-    expected_value = uvm_mem_get_cpu_addr_kernel(mem);
-    TEST_CHECK_GOTO(memcmp(sys_verif, expected_value, verif_size) == 0, done);
-
-done:
-    uvm_mem_free(sys_mem);
-
-    return status;
-}
-
-static NvU64 test_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *phys_addr)
-{
-    uvm_page_tree_t *tree = range_vec->tree;
-    uvm_gpu_phys_address_t phys = uvm_gpu_phys_address(UVM_APERTURE_SYS, (NvU64)phys_addr);
-
-    return tree->hal->make_pte(phys.aperture, phys.address, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
-}
-
-static NV_STATUS test_huge_page_size(uvm_va_space_t *va_space, uvm_gpu_t *gpu, NvU64 page_size)
-{
-    NV_STATUS status = NV_OK;
-    uvm_mem_t *mem = NULL;
-    size_t size = PAGE_SIZE;
-    NvU64 *cpu_addr;
-    NvU64 huge_gpu_va;
-    NvU64 gpu_phys_addr;
-    uvm_page_table_range_vec_t *range_vec;
-    NvU8 value = 0xA5;
-
-    // TODO: Bug 3839176: the test is waived on Confidential Computing because
-    // it assumes that GPU can access system memory without using encryption.
-    if (g_uvm_global.conf_computing_enabled)
-        return NV_OK;
-
-    TEST_NV_CHECK_GOTO(mem_alloc_sysmem_and_map_cpu_kernel(size, gpu, &mem), cleanup);
-    cpu_addr = uvm_mem_get_cpu_addr_kernel(mem);
-    memset(cpu_addr, value, mem->size);
-
-    // Map it on the GPU (uvm_mem base area), it creates GPU physical address
-    // for the sysmem mapping.
-    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(mem, gpu), cleanup);
-
-    huge_gpu_va = UVM_ALIGN_UP(gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size, page_size);
-    TEST_CHECK_GOTO(IS_ALIGNED(huge_gpu_va, page_size), cleanup);
-    TEST_CHECK_GOTO((huge_gpu_va + page_size) < (1ull << gpu->address_space_tree.hal->num_va_bits()), cleanup);
-
-    // Manually mapping huge_gpu_va because page_size is larger than the largest
-    // uvm_mem_t chunk/page size, so we don't use uvm_mem_gpu_kernel() helper.
-    TEST_NV_CHECK_GOTO(uvm_page_table_range_vec_create(&gpu->address_space_tree,
-                                                       huge_gpu_va,
-                                                       page_size,
-                                                       page_size,
-                                                       UVM_PMM_ALLOC_FLAGS_NONE,
-                                                       &range_vec), cleanup);
-
-    gpu_phys_addr = uvm_mem_gpu_physical(mem, gpu, 0, size).address;
-
-    TEST_NV_CHECK_GOTO(uvm_page_table_range_vec_write_ptes(range_vec,
-                                                           UVM_MEMBAR_NONE,
-                                                           test_pte_maker,
-                                                           (void *)gpu_phys_addr), cleanup_range);
-
-    // Despite the huge page_size mapping, only PAGE_SIZE is backed by an
-    // allocation "own" by the test. We compute the offset within the huge page
-    // to verify only this segment.
-    TEST_NV_CHECK_GOTO(check_huge_page_from_gpu(gpu, mem, huge_gpu_va + (gpu_phys_addr % page_size)),
-                       cleanup_range);
-
-cleanup_range:
-    uvm_page_table_range_vec_destroy(range_vec);
-    range_vec = NULL;
-
-cleanup:
-    uvm_mem_free(mem);
-
-    return status;
-}
-
-// Check the GPU access to memory from a 512MB+ page size mapping.
-// The test allocates a PAGE_SIZE sysmem page, but uses the GMMU to map a huge
-// page size area. It maps the allocated page to this area, and uses the CE to
-// access it, thus, exercising a memory access using a huge page.
-static NV_STATUS test_huge_pages(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
-{
-    NvU64 page_sizes = gpu->address_space_tree.hal->page_sizes();
-    NvU64 page_size = 0;
-
-    for_each_page_size(page_size, page_sizes) {
-        if (page_size < UVM_PAGE_SIZE_512M)
-            continue;
-
-        TEST_NV_CHECK_RET(test_huge_page_size(va_space, gpu, page_size));
-    }
-
-    return NV_OK;
-}
-
 static NV_STATUS test_basic(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
@@ -708,8 +613,8 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
    for_each_va_space_gpu(gpu, va_space) {
        TEST_NV_CHECK_RET(test_basic_vidmem(gpu));
        TEST_NV_CHECK_RET(test_basic_sysmem_dma(gpu));
+        TEST_NV_CHECK_RET(test_basic_vidmem_unprotected(gpu));
        TEST_NV_CHECK_RET(test_basic_dma_pool(gpu));
-        TEST_NV_CHECK_RET(test_huge_pages(va_space, gpu));
    }

    return NV_OK;
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -52,6 +52,10 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
    uvm_gpu_t *owning_gpu = UVM_ID_IS_CPU(resident_id)? NULL: uvm_va_space_get_gpu(va_space, resident_id);
    const bool can_copy_from = uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(copying_gpu->id)],
                                                       resident_id);
+    const bool direct_peer = owning_gpu &&
+                             (owning_gpu != copying_gpu) &&
+                             can_copy_from &&
+                             !uvm_gpu_peer_caps(owning_gpu, copying_gpu)->is_indirect_peer;

    UVM_ASSERT(page_index < state->num_pages);

@@ -61,13 +65,15 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
        // Local vidmem address
        *gpu_addr = uvm_gpu_address_copy(owning_gpu, uvm_gpu_page_to_phys_address(owning_gpu, page));
    }
-    else if (owning_gpu && can_copy_from) {
+    else if (direct_peer) {
+        // Direct GPU peer
        uvm_gpu_identity_mapping_t *gpu_peer_mappings = uvm_gpu_get_peer_mapping(copying_gpu, owning_gpu->id);
        uvm_gpu_phys_address_t phys_addr = uvm_gpu_page_to_phys_address(owning_gpu, page);

        *gpu_addr = uvm_gpu_address_virtual(gpu_peer_mappings->base + phys_addr.address);
    }
    else {
+        // Sysmem/Indirect Peer
        NV_STATUS status = uvm_parent_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);

        if (status != NV_OK)
@@ -501,7 +507,7 @@ static NV_STATUS migrate_vma_copy_pages(struct vm_area_struct *vma,
    return NV_OK;
 }

-static void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
+void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
 {
    unsigned long i;

@@ -517,7 +523,7 @@ static void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
    }
 }

-static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state)
+void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state)
 {
    struct vm_area_struct *vma = args->vma;
    unsigned long start = args->start;
@@ -547,13 +553,12 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
        migrate_vma_cleanup_pages(args->dst, state->num_pages);
 }

-#if defined(CONFIG_MIGRATE_VMA_HELPER)
-static void migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
-                                              const unsigned long *src,
-                                              unsigned long *dst,
-                                              unsigned long start,
-                                              unsigned long end,
-                                              void *private)
+void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
+                                const unsigned long *src,
+                                unsigned long *dst,
+                                unsigned long start,
+                                unsigned long end,
+                                void *private)
 {
    struct migrate_vma args =
    {
@@ -564,11 +569,10 @@ static void migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
        .end = end,
    };

-    migrate_vma_alloc_and_copy(&args, (migrate_vma_state_t *) private);
+    uvm_migrate_vma_alloc_and_copy(&args, (migrate_vma_state_t *) private);
 }
-#endif

-static void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state)
+void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state)
 {
    unsigned long i;

@@ -638,13 +642,12 @@ static void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_v
    UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages));
 }

-#if defined(CONFIG_MIGRATE_VMA_HELPER)
-static void migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
-                                                const unsigned long *src,
-                                                const unsigned long *dst,
-                                                unsigned long start,
-                                                unsigned long end,
-                                                void *private)
+void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
+                                             const unsigned long *src,
+                                             const unsigned long *dst,
+                                             unsigned long start,
+                                             unsigned long end,
+                                             void *private)
 {
    struct migrate_vma args =
    {
@@ -657,7 +660,6 @@ static void migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,

    uvm_migrate_vma_finalize_and_map(&args, (migrate_vma_state_t *) private);
 }
-#endif

 static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *state)
 {
@@ -666,8 +668,8 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
 #if defined(CONFIG_MIGRATE_VMA_HELPER)
    static const struct migrate_vma_ops uvm_migrate_vma_ops =
    {
-        .alloc_and_copy = migrate_vma_alloc_and_copy_helper,
-        .finalize_and_map = migrate_vma_finalize_and_map_helper,
+        .alloc_and_copy = uvm_migrate_vma_alloc_and_copy_helper,
+        .finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
    };

    ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
@@ -683,7 +685,7 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
    if (ret < 0)
        return errno_to_nv_status(ret);

-    migrate_vma_alloc_and_copy(args, state);
+    uvm_migrate_vma_alloc_and_copy(args, state);
    if (state->status == NV_OK) {
        migrate_vma_pages(args);
        uvm_migrate_vma_finalize_and_map(args, state);
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
@@ -150,6 +150,23 @@ struct migrate_vma {
    unsigned long           start;
    unsigned long           end;
 };
+
+void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
+                                           const unsigned long *src,
+                                           unsigned long *dst,
+                                           unsigned long start,
+                                           unsigned long end,
+                                           void *private);
+
+void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
+                                             const unsigned long *src,
+                                             const unsigned long *dst,
+                                             unsigned long start,
+                                             unsigned long end,
+                                             void *private);
+#else
+void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state);
+void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state);
 #endif // CONFIG_MIGRATE_VMA_HELPER

 // Populates the given VA range and tries to migrate all the pages to dst_id. If
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -50,18 +50,18 @@
 // because that type is normally associated with the LCE mapped to the most
 // PCEs. The higher bandwidth is beneficial when doing bulk operations such as
 // clearing PTEs, or initializing a page directory/table.
-#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({                                                        \
-    NV_STATUS status;                                                                                                       \
-    uvm_channel_manager_t *manager = (tree)->gpu->channel_manager;                                                          \
-                                                                                                                            \
-    if (manager == NULL)                                                                                                    \
-        status = uvm_push_begin_fake((tree)->gpu, (push));                                                                  \
-    else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent))                                                  \
-        status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__);      \
-    else                                                                                                                    \
-        status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
-                                                                                                                            \
-    status;                                                                                                                 \
+#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({                                                            \
+    NV_STATUS __status;                                                                                                         \
+    uvm_channel_manager_t *__manager = (tree)->gpu->channel_manager;                                                            \
+                                                                                                                                \
+    if (__manager == NULL)                                                                                                      \
+        __status = uvm_push_begin_fake((tree)->gpu, (push));                                                                    \
+    else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent))                                                      \
+        __status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__);      \
+    else                                                                                                                        \
+        __status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
+                                                                                                                                \
+    __status;                                                                                                                   \
 })

 // Default location of page table allocations
@@ -153,17 +153,20 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
 // - UVM_APERTURE_VID       biggest page size on vidmem mappings
 // - UVM_APERTURE_SYS       biggest page size on sysmem mappings
 // - UVM_APERTURE_PEER_0-7  biggest page size on peer mappings
-static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
+static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
 {
    UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);

    // There may be scenarios where the GMMU must use a subset of the supported
    // page sizes, e.g., to comply with the vMMU supported page sizes due to
    // segmentation sizes.
-    if (aperture == UVM_APERTURE_VID)
-        return uvm_mmu_biggest_page_size_up_to(tree, tree->gpu->mem_info.max_vidmem_page_size);
-
-    return 1ULL << __fls(tree->hal->page_sizes());
+    if (aperture == UVM_APERTURE_VID) {
+        UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
+        return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
+    }
+    else {
+        return 1 << __fls(tree->hal->page_sizes());
+    }
 }

 static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
@@ -251,7 +254,7 @@ static void phys_mem_deallocate(uvm_page_tree_t *tree, uvm_mmu_page_table_alloc_
 }

 static void page_table_range_init(uvm_page_table_range_t *range,
-                                 NvU64 page_size,
+                                 NvU32 page_size,
                                 uvm_page_directory_t *dir,
                                 NvU32 start_index,
                                 NvU32 end_index)
@@ -441,9 +444,9 @@ static void pde_fill(uvm_page_tree_t *tree,
        pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
 }

-static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
+static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
 {
-    NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
+    NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
    NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;

    // Passing in NULL for the phys_allocs will mark the child entries as
@@ -494,7 +497,7 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_direc
 }

 static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
-                                                NvU64 page_size,
+                                                NvU32 page_size,
                                                NvU32 depth,
                                                uvm_pmm_alloc_flags_t pmm_flags)
 {
@@ -543,7 +546,7 @@ static inline NvU32 entry_index_from_vaddr(NvU64 vaddr, NvU32 addr_bit_shift, Nv
    return (NvU32)((vaddr >> addr_bit_shift) & mask);
 }

-static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU64 page_size)
+static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU32 page_size)
 {
    return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
 }
@@ -580,7 +583,7 @@ static void pde_write(uvm_page_tree_t *tree,
    pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
 }

-static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU64 page_size)
+static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
 {
    UVM_ASSERT(dir->ref_count > 0);

@@ -591,38 +594,35 @@ static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU
 static void pde_clear(uvm_page_tree_t *tree,
                      uvm_page_directory_t *dir,
                      NvU32 entry_index,
-                      NvU64 page_size,
+                      NvU32 page_size,
                      uvm_push_t *push)
 {
    host_pde_clear(tree, dir, entry_index, page_size);
    pde_write(tree, dir, entry_index, false, push);
 }

-static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
+static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
 {
+    uvm_chunk_sizes_mask_t alloc_sizes = 0;
    uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
-    unsigned long page_sizes, page_size_log2;
-    uvm_chunk_sizes_mask_t alloc_sizes;

-    if (hal == NULL)
-        return 0;
+    if (hal != NULL) {
+        unsigned long page_size_log2;
+        unsigned long page_sizes = hal->page_sizes();
+        BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));

-    page_sizes = hal->page_sizes();
-    alloc_sizes = 0;
-
-    BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
-
-    for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
-        NvU32 i;
-        NvU64 page_size = 1ULL << page_size_log2;
-        for (i = 0; i <= hal->page_table_depth(page_size); i++)
-            alloc_sizes |= hal->allocation_size(i, page_size);
+        for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
+            NvU32 i;
+            NvU32 page_size = (NvU32)(1ULL << page_size_log2);
+            for (i = 0; i <= hal->page_table_depth(page_size); i++)
+                alloc_sizes |= hal->allocation_size(i, page_size);
+        }
    }

    return alloc_sizes;
 }

-static NvU64 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
+static NvU32 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
 {
    uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);

@@ -662,7 +662,7 @@ static NV_STATUS page_tree_end_and_wait(uvm_page_tree_t *tree, uvm_push_t *push)
 }

 static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
-                                     NvU64 page_size,
+                                     NvU32 page_size,
                                     NvS32 invalidate_depth,
                                     NvU32 used_count,
                                     uvm_page_directory_t **dirs_used)
@@ -713,7 +713,7 @@ static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
 }

 static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
-                                     NvU64 page_size,
+                                     NvU32 page_size,
                                     NvS32 invalidate_depth,
                                     NvU32 used_count,
                                     uvm_page_directory_t **dirs_used)
@@ -805,7 +805,7 @@ static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,

 // initialize new page tables and insert them into the tree
 static NV_STATUS write_gpu_state(uvm_page_tree_t *tree,
-                                 NvU64 page_size,
+                                 NvU32 page_size,
                                 NvS32 invalidate_depth,
                                 NvU32 used_count,
                                 uvm_page_directory_t **dirs_used)
@@ -842,7 +842,7 @@ static void free_unused_directories(uvm_page_tree_t *tree,
    }
 }

-static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU64 page_size, uvm_mmu_page_table_alloc_t *out)
+static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
 {
    NvU32 depth = tree->hal->page_table_depth(page_size);
    NvLength alloc_size = tree->hal->allocation_size(depth, page_size);
@@ -871,7 +871,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
 {
    NV_STATUS status;
    NvU64 min_va_upper, max_va_lower;
-    NvU64 page_size;
+    NvU32 page_size;

    if (!page_tree_ats_init_required(tree))
        return NV_OK;
@@ -1090,7 +1090,7 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
 NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
                             uvm_gpu_va_space_t *gpu_va_space,
                             uvm_page_tree_type_t type,
-                             NvU64 big_page_size,
+                             NvU32 big_page_size,
                             uvm_aperture_t location,
                             uvm_page_tree_t *tree)
 {
@@ -1110,7 +1110,7 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
    tree->gpu_va_space = gpu_va_space;
    tree->big_page_size = big_page_size;

-    UVM_ASSERT(uvm_mmu_page_size_supported(tree, big_page_size));
+    UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());

    page_tree_set_location(tree, location);

@@ -1347,9 +1347,9 @@ NV_STATUS uvm_page_tree_wait(uvm_page_tree_t *tree)
 }

 static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
-                              NvU64 page_size,
+                              NvU32 page_size,
                              NvU64 start,
-                              NvU64 size,
+                              NvLength size,
                              uvm_page_table_range_t *range,
                              NvU32 *cur_depth,
                              uvm_page_directory_t **dir_cache)
@@ -1379,9 +1379,9 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
    // This algorithm will work with unaligned ranges, but the caller's intent
    // is unclear
    UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0,
-                   "start 0x%llx size 0x%llx page_size 0x%llx\n",
+                   "start 0x%llx size 0x%zx page_size 0x%x\n",
                   start,
-                   size,
+                   (size_t)size,
                   page_size);

    // The GPU should be capable of addressing the passed range
@@ -1444,11 +1444,11 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
    return write_gpu_state(tree, page_size, invalidate_depth, used_count, dirs_used);
 }

-static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvU64 size, uvm_page_table_range_t *range)
+static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uvm_page_table_range_t *range)
 {
    NV_STATUS status;
    uvm_push_t push;
-    NvU64 page_sizes;
+    NvU32 page_sizes;
    uvm_mmu_page_table_alloc_t *phys_alloc[1];

    // TODO: Bug 2734399
@@ -1460,7 +1460,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvU64 size, uvm_p
    status = page_tree_begin_acquire(tree,
                                     &tree->tracker,
                                     &push,
-                                     "map remap: [0x%llx, 0x%llx), page_size: %lld",
+                                     "map remap: [0x%llx, 0x%llx), page_size: %d",
                                     start,
                                     start + size,
                                     range->page_size);
@@ -1500,9 +1500,9 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvU64 size, uvm_p
 }

 NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
-                                       NvU64 page_size,
+                                       NvU32 page_size,
                                       NvU64 start,
-                                       NvU64 size,
+                                       NvLength size,
                                       uvm_pmm_alloc_flags_t pmm_flags,
                                       uvm_page_table_range_t *range)
 {
@@ -1545,9 +1545,9 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
 }

 NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
-                                 NvU64 page_size,
+                                 NvU32 page_size,
                                 NvU64 start,
-                                 NvU64 size,
+                                 NvLength size,
                                 uvm_pmm_alloc_flags_t pmm_flags,
                                 uvm_page_table_range_t *range)
 {
@@ -1596,7 +1596,7 @@ void uvm_page_table_range_shrink(uvm_page_tree_t *tree, uvm_page_table_range_t *
 }

 NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
-                                  NvU64 page_size,
+                                  NvU32 page_size,
                                  NvU64 start,
                                  uvm_pmm_alloc_flags_t pmm_flags,
                                  uvm_page_table_range_t *single)
@@ -1621,7 +1621,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
 static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
                             uvm_page_directory_t *pte_dir,
                             uvm_page_directory_t *parent,
-                             NvU64 page_size)
+                             NvU32 page_size)
 {
    NV_STATUS status;
    uvm_push_t push;
@@ -1633,7 +1633,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
    // The flat mappings should always be set up when executing this path
    UVM_ASSERT(!uvm_mmu_use_cpu(tree));

-    status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %llu", page_size);
+    status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %u", page_size);
    if (status != NV_OK)
        return status;

@@ -1660,7 +1660,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
 }

 NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
-                                    NvU64 page_size,
+                                    NvU32 page_size,
                                    uvm_pmm_alloc_flags_t pmm_flags,
                                    uvm_page_table_range_t *single,
                                    uvm_page_table_range_t *children)
@@ -1768,7 +1768,7 @@ static size_t range_vec_calc_range_index(uvm_page_table_range_vec_t *range_vec,
 NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
                                        NvU64 start,
                                        NvU64 size,
-                                        NvU64 page_size,
+                                        NvU32 page_size,
                                        uvm_pmm_alloc_flags_t pmm_flags,
                                        uvm_page_table_range_vec_t *range_vec)
 {
@@ -1776,8 +1776,8 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
    size_t i;

    UVM_ASSERT(size != 0);
-    UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%llx\n", start, page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%x\n", start, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);

    range_vec->tree = tree;
    range_vec->page_size = page_size;
@@ -1826,7 +1826,7 @@ out:
 NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
                                          NvU64 start,
                                          NvU64 size,
-                                          NvU64 page_size,
+                                          NvU32 page_size,
                                          uvm_pmm_alloc_flags_t pmm_flags,
                                          uvm_page_table_range_vec_t **range_vec_out)
 {
@@ -1952,7 +1952,7 @@ static NV_STATUS uvm_page_table_range_vec_clear_ptes_gpu(uvm_page_table_range_ve
    size_t i;
    uvm_page_tree_t *tree = range_vec->tree;
    uvm_gpu_t *gpu = tree->gpu;
-    NvU64 page_size = range_vec->page_size;
+    NvU32 page_size = range_vec->page_size;
    NvU32 entry_size = uvm_mmu_pte_size(tree, page_size);
    NvU64 invalid_pte = 0;
    uvm_push_t push;
@@ -2076,13 +2076,13 @@ static NV_STATUS uvm_page_table_range_vec_write_ptes_cpu(uvm_page_table_range_ve
        uvm_mmu_page_table_alloc_t *dir = &range->table->phys_alloc;
        NvU32 entry;

-        for (entry = 0; entry < range->entry_count; ++entry) {
+        for (entry = range->start_index; entry < range->entry_count; ++entry) {
            NvU64 pte_bits[2] = {pte_maker(range_vec, offset, caller_data), 0};

            if (entry_size == 8)
-                uvm_mmu_page_table_cpu_memset_8(tree->gpu, dir, range->start_index + entry, pte_bits[0], 1);
+                uvm_mmu_page_table_cpu_memset_8(tree->gpu, dir, entry, pte_bits[0], 1);
            else
-                uvm_mmu_page_table_cpu_memset_16(tree->gpu, dir, range->start_index + entry, pte_bits, 1);
+                uvm_mmu_page_table_cpu_memset_16(tree->gpu, dir, entry, pte_bits, 1);

            offset += range_vec->page_size;
        }
@@ -2237,7 +2237,7 @@ static NV_STATUS create_identity_mapping(uvm_gpu_t *gpu,
                                         NvU64 size,
                                         uvm_aperture_t aperture,
                                         NvU64 phys_offset,
-                                         NvU64 page_size,
+                                         NvU32 page_size,
                                         uvm_pmm_alloc_flags_t pmm_flags)
 {
    NV_STATUS status;
@@ -2310,9 +2310,9 @@ bool uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(uvm_parent_gpu_t *parent_gp
    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
 }

-static NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
+NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
 {
-    NvU64 page_size;
+    NvU32 page_size;
    NvU64 size;
    uvm_aperture_t aperture = UVM_APERTURE_VID;
    NvU64 phys_offset = 0;
@@ -2351,7 +2351,7 @@ static void destroy_static_vidmem_mapping(uvm_gpu_t *gpu)

 NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
 {
-    NvU64 page_size;
+    NvU32 page_size;
    NvU64 size;
    uvm_aperture_t aperture;
    NvU64 phys_offset;
@@ -2406,9 +2406,9 @@ void uvm_mmu_init_gpu_chunk_sizes(uvm_parent_gpu_t *parent_gpu)
    // to handle allocating multiple chunks per page.
    parent_gpu->mmu_user_chunk_sizes = sizes & PAGE_MASK;

-    // The maximum chunk size is 2MB (i.e., UVM_CHUNK_SIZE_MAX), therefore we
-    // mask out any supported page size greater than UVM_CHUNK_SIZE_MAX from
-    // the chunk size list.
+    // Ampere+ GPUs support 512MB page size, however, the maximum chunk size is
+    // 2MB(i.e., UVM_CHUNK_SIZE_MAX), therefore we mask out any supported page
+    // size greater than UVM_CHUNK_SIZE_MAX from the chunk size list.
    parent_gpu->mmu_user_chunk_sizes &= UVM_CHUNK_SIZES_MASK;

    parent_gpu->mmu_kernel_chunk_sizes = allocation_sizes_for_big_page_size(parent_gpu, UVM_PAGE_SIZE_64K) |
@@ -2535,7 +2535,7 @@ static void root_chunk_mapping_destroy(uvm_gpu_t *gpu, uvm_gpu_root_chunk_mappin
    uvm_push_t push;
    NvU32 entry_size;
    uvm_pte_batch_t pte_batch;
-    NvU64 page_size;
+    NvU32 page_size;
    NvU64 size;
    NvU64 invalid_pte;
    uvm_page_table_range_t *range = root_chunk_mapping->range;
@@ -2585,7 +2585,7 @@ static NV_STATUS root_chunk_mapping_create(uvm_gpu_t *gpu, uvm_gpu_root_chunk_ma
    uvm_push_t push;
    NvU64 pte_bits;
    NvU32 entry_size;
-    NvU64 page_size = UVM_CHUNK_SIZE_MAX;
+    NvU32 page_size = UVM_CHUNK_SIZE_MAX;
    NvU64 size = UVM_CHUNK_SIZE_MAX;

    range = uvm_kvmalloc_zero(sizeof(*range));
@@ -2852,7 +2852,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
        if (sysmem_mapping->range_vec == NULL) {
            uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
            NvU64 phys_offset = curr_pa;
-            NvU64 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
+            NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
            uvm_pmm_alloc_flags_t pmm_flags;

            // No eviction is requested when allocating the page tree storage,
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@@ -39,10 +39,10 @@
 // The following memory regions are not to scale. The memory layout is linear,
 // i.e., no canonical form address conversion.
 //
-// Hopper-Blackwell:
+// Hopper:
 // +----------------+ 128PB
 // |                |
-// |   (not used)*  | * See note(1)
+// |   (not used)   |
 // |                |
 // ------------------
 // |uvm_mem_t(128GB)| (uvm_mem_va_size)
@@ -66,7 +66,7 @@
 // Pascal-Ada:
 // +----------------+ 512TB
 // |                |
-// |   (not used)*  | * See note(1)
+// |   (not used)   |
 // |                |
 // ------------------
 // |uvm_mem_t(128GB)| (uvm_mem_va_size)
@@ -107,9 +107,6 @@
 // | rm_mem(128GB)  | (rm_va_size)
 // |                |
 // +----------------+ 0 (rm_va_base)
-//
-// Note (1): This region is used in unit tests, see
-// tests/uvm_mem_test.c:test_huge_pages().

 // Maximum memory of any GPU.
 #define UVM_GPU_MAX_PHYS_MEM (UVM_SIZE_1TB)
@@ -211,7 +208,7 @@ struct uvm_mmu_mode_hal_struct
    // This is an optimization which reduces TLB pressure, reduces the number of
    // TLB invalidates we must issue, and means we don't have to initialize the
    // 4k PTEs which are covered by big PTEs since the MMU will never read them.
-    NvU64 (*unmapped_pte)(NvU64 page_size);
+    NvU64 (*unmapped_pte)(NvU32 page_size);

    // Bit pattern used for debug purposes to clobber PTEs which ought to be
    // unused. In practice this will generate a PRIV violation or a physical
@@ -237,23 +234,23 @@ struct uvm_mmu_mode_hal_struct
    // For dual PDEs, this is ether 1 or 0, depending on the page size.
    // This is used to index the host copy only. GPU PDEs are always entirely
    // re-written using make_pde.
-    NvLength (*entry_offset)(NvU32 depth, NvU64 page_size);
+    NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);

    // number of virtual address bits used to index the directory/table at a
    // given depth
-    NvU32 (*index_bits)(NvU32 depth, NvU64 page_size);
+    NvU32 (*index_bits)(NvU32 depth, NvU32 page_size);

    // total number of bits that represent the virtual address space
    NvU32 (*num_va_bits)(void);

    // the size, in bytes, of a directory/table at a given depth.
-    NvLength (*allocation_size)(NvU32 depth, NvU64 page_size);
+    NvLength (*allocation_size)(NvU32 depth, NvU32 page_size);

    // the depth which corresponds to the page tables
-    NvU32 (*page_table_depth)(NvU64 page_size);
+    NvU32 (*page_table_depth)(NvU32 page_size);

    // bitwise-or of supported page sizes
-    NvU64 (*page_sizes)(void);
+    NvU32 (*page_sizes)(void);
 };

 struct uvm_page_table_range_struct
@@ -261,7 +258,7 @@ struct uvm_page_table_range_struct
    uvm_page_directory_t *table;
    NvU32 start_index;
    NvU32 entry_count;
-    NvU64 page_size;
+    NvU32 page_size;
 };

 typedef enum
@@ -278,7 +275,7 @@ struct uvm_page_tree_struct
    uvm_page_directory_t *root;
    uvm_mmu_mode_hal_t *hal;
    uvm_page_tree_type_t type;
-    NvU64 big_page_size;
+    NvU32 big_page_size;

    // Pointer to the GPU VA space containing the page tree.
    // This pointer is set only for page trees of type
@@ -328,7 +325,7 @@ struct uvm_page_table_range_vec_struct
    NvU64 size;

    // Page size used for all the page table ranges
-    NvU64 page_size;
+    NvU32 page_size;

    // Page table ranges covering the VA
    uvm_page_table_range_t *ranges;
@@ -355,7 +352,7 @@ void uvm_mmu_init_gpu_peer_addresses(uvm_gpu_t *gpu);
 NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
                             uvm_gpu_va_space_t *gpu_va_space,
                             uvm_page_tree_type_t type,
-                             NvU64 big_page_size,
+                             NvU32 big_page_size,
                             uvm_aperture_t location,
                             uvm_page_tree_t *tree_out);

@@ -377,9 +374,9 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
 // an existing range or change the size of an existing range, use
 // uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
 NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
-                                 NvU64 page_size,
+                                 NvU32 page_size,
                                 NvU64 start,
-                                 NvU64 size,
+                                 NvLength size,
                                 uvm_pmm_alloc_flags_t pmm_flags,
                                 uvm_page_table_range_t *range);

@@ -387,9 +384,9 @@ NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
 //
 // All pending operations can be waited on with uvm_page_tree_wait().
 NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
-                                       NvU64 page_size,
+                                       NvU32 page_size,
                                       NvU64 start,
-                                       NvU64 size,
+                                       NvLength size,
                                       uvm_pmm_alloc_flags_t pmm_flags,
                                       uvm_page_table_range_t *range);

@@ -398,7 +395,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
 // This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
 // page_size.
 NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
-                                  NvU64 page_size,
+                                  NvU32 page_size,
                                  NvU64 start,
                                  uvm_pmm_alloc_flags_t pmm_flags,
                                  uvm_page_table_range_t *single);
@@ -429,7 +426,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
 // It is the caller's responsibility to initialize the returned table before
 // calling uvm_page_tree_write_pde.
 NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
-                                    NvU64 page_size,
+                                    NvU32 page_size,
                                    uvm_pmm_alloc_flags_t pmm_flags,
                                    uvm_page_table_range_t *single,
                                    uvm_page_table_range_t *children);
@@ -483,7 +480,7 @@ static uvm_mmu_page_table_alloc_t *uvm_page_tree_pdb(uvm_page_tree_t *tree)
 NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
                                        NvU64 start,
                                        NvU64 size,
-                                        NvU64 page_size,
+                                        NvU32 page_size,
                                        uvm_pmm_alloc_flags_t pmm_flags,
                                        uvm_page_table_range_vec_t *range_vec);

@@ -492,7 +489,7 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
 NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
                                          NvU64 start,
                                          NvU64 size,
-                                          NvU64 page_size,
+                                          NvU32 page_size,
                                          uvm_pmm_alloc_flags_t pmm_flags,
                                          uvm_page_table_range_vec_t **range_vec_out);

@@ -604,12 +601,12 @@ void uvm_mmu_chunk_unmap(uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
 // uvm_parent_gpu_map_cpu_pages for the given GPU.
 NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size);

-static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU64 page_size)
+static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU32 page_size)
 {
    return 1ull << tree->hal->index_bits(depth, page_size);
 }

-static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
+static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
 {
    NvU32 depth = tree->hal->page_table_depth(page_size);
    return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
@@ -618,21 +615,21 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
 // Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
 // the largest page size supported in a given system, which considers the GMMU
 // and vMMU page sizes and segment sizes.
-static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU64 page_size)
+static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
 {
-    UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%llx\n", page_size);
+    UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);

    return (tree->hal->page_sizes() & page_size) != 0;
 }

-static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_page_size)
+static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_page_size)
 {
-    NvU64 gpu_page_sizes = tree->hal->page_sizes();
-    NvU64 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
-    NvU64 page_sizes;
-    NvU64 page_size;
+    NvU32 gpu_page_sizes = tree->hal->page_sizes();
+    NvU32 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
+    NvU32 page_sizes;
+    NvU32 page_size;

-    UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%llx\n", max_page_size);
+    UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%x\n", max_page_size);

    if (max_page_size < smallest_gpu_page_size)
        return 0;
@@ -641,14 +638,14 @@ static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_pa
    page_sizes = gpu_page_sizes & (max_page_size | (max_page_size - 1));

    // And pick the biggest one of them
-    page_size = 1ULL << __fls(page_sizes);
+    page_size = 1 << __fls(page_sizes);

-    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx", page_size);
+    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x", page_size);

    return page_size;
 }

-static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU64 page_size)
+static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
 {
    return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
 }
--- a/kernel-open/nvidia-uvm/uvm_page_tree_test.c
+++ b/kernel-open/nvidia-uvm/uvm_page_tree_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -46,9 +46,6 @@
 // HOPPER_*
 #include "clc8b5.h"
 #include "clc86f.h"
-// BLACKWELL_*
-#include "clc96f.h"
-#include "clc9b5.h"
 // ARCHITECTURE_*
 #include "ctrl2080mc.h"

@@ -99,7 +96,7 @@ typedef struct
 {
    NvU64 base;
    NvU64 size;
-    NvU64 page_size;
+    NvU32 page_size;
    NvU32 depth;
    uvm_membar_t membar;
 } fake_tlb_invalidate_t;
@@ -156,7 +153,7 @@ static void fake_tlb_invalidate_va(uvm_push_t *push,
                                   NvU32 depth,
                                   NvU64 base,
                                   NvU64 size,
-                                   NvU64 page_size,
+                                   NvU32 page_size,
                                   uvm_membar_t membar)
 {
    if (!g_fake_tlb_invals_tracking_enabled)
@@ -252,11 +249,7 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
 }

 static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
-                                             NvU64 base,
-                                             NvU64 size,
-                                             NvU64 page_size,
-                                             NvU32 expected_depth,
-                                             bool expected_membar)
+        NvU64 base, NvU64 size, NvU32 page_size, NvU32 expected_depth, bool expected_membar)
 {
    UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);

@@ -278,7 +271,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
        return false;
    }
    if (inval->page_size != page_size && inval->base != 0 && inval->size != -1) {
-        UVM_TEST_PRINT("Expected page size %llu, got %llu instead\n", page_size, inval->page_size);
+        UVM_TEST_PRINT("Expected page size %u, got %u instead\n", page_size, inval->page_size);
        return false;
    }

@@ -287,7 +280,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,

 static bool assert_invalidate_range(NvU64 base,
                                    NvU64 size,
-                                    NvU64 page_size,
+                                    NvU32 page_size,
                                    bool allow_inval_all,
                                    NvU32 range_depth,
                                    NvU32 all_depth,
@@ -332,7 +325,7 @@ static NV_STATUS test_page_tree_init_kernel(uvm_gpu_t *gpu, NvU32 big_page_size,
 }

 static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
-                                         NvU64 page_size,
+                                         NvU32 page_size,
                                         NvU64 start,
                                         NvLength size,
                                         uvm_page_table_range_t *range)
@@ -348,7 +341,7 @@ static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
 }

 static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
-                                          NvU64 page_size,
+                                          NvU32 page_size,
                                          NvU64 start,
                                          uvm_page_table_range_t *single)
 {
@@ -362,14 +355,14 @@ static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
 }

 static NV_STATUS test_page_tree_alloc_table(uvm_page_tree_t *tree,
-                                            NvU64 page_size,
+                                            NvU32 page_size,
                                            uvm_page_table_range_t *single,
                                            uvm_page_table_range_t *children)
 {
    return uvm_page_tree_alloc_table(tree, page_size, UVM_PMM_ALLOC_FLAGS_NONE, single, children);
 }

-static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start)
+static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start)
 {
    uvm_page_table_range_t entry;
    bool result = true;
@@ -385,7 +378,7 @@ static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, N
    return assert_no_invalidate() && result;
 }

-static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start, NvU32 depth, bool membar)
+static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvU32 depth, bool membar)
 {
    uvm_page_table_range_t entry;
    bool result = true;
@@ -675,77 +668,6 @@ static NV_STATUS get_single_page_512m(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS alloc_256g_memory(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range;
-
-    NvLength size = 256 * UVM_SIZE_1GB;
-    MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, 0, size, &range), NV_OK);
-    TEST_CHECK_RET(range.entry_count == 1);
-    TEST_CHECK_RET(range.table->depth == 2);
-    TEST_CHECK_RET(range.start_index == 0);
-    TEST_CHECK_RET(range.page_size == UVM_PAGE_SIZE_256G);
-    TEST_CHECK_RET(tree.root->ref_count == 1);
-    TEST_CHECK_RET(tree.root->entries[0]->ref_count == 1);
-    TEST_CHECK_RET(tree.root->entries[0]->entries[0]->ref_count == 1);
-    TEST_CHECK_RET(range.table == tree.root->entries[0]->entries[0]);
-    uvm_page_tree_put_ptes(&tree, &range);
-    UVM_ASSERT(tree.root->ref_count == 0);
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
-static NV_STATUS alloc_adjacent_256g_memory(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range1;
-    uvm_page_table_range_t range2;
-
-    NvLength size = 256 * UVM_SIZE_1GB;
-    MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, size, size, &range1), NV_OK);
-    TEST_CHECK_RET(range1.entry_count == 1);
-
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, 0, size, &range2), NV_OK);
-    TEST_CHECK_RET(range2.entry_count == 1);
-    TEST_CHECK_RET(range1.table == range2.table);
-    TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]);
-    TEST_CHECK_RET(range1.start_index == 1);
-    TEST_CHECK_RET(range2.start_index == 0);
-
-    uvm_page_tree_put_ptes(&tree, &range1);
-    uvm_page_tree_put_ptes(&tree, &range2);
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
-static NV_STATUS get_single_page_256g(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range;
-
-    // use a start address not at the beginning of a PDE2 entry's range
-    NvU64 start = 3 * 256 * UVM_SIZE_1GB;
-    NvLength size = 256 * UVM_SIZE_1GB;
-
-    MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, start, size, &range), NV_OK);
-
-    TEST_CHECK_RET(range.entry_count == 1);
-    TEST_CHECK_RET(range.table->depth == 2);
-    TEST_CHECK_RET(range.page_size == UVM_PAGE_SIZE_256G);
-
-    uvm_page_tree_put_ptes(&tree, &range);
-    TEST_CHECK_RET(tree.root->ref_count == 0);
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
 static NV_STATUS get_entire_table_4k(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -793,29 +715,6 @@ static NV_STATUS get_entire_table_512m(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS get_entire_table_256g(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range;
-
-    NvU64 start = 1UL << 48;
-    NvLength size = 512UL * UVM_PAGE_SIZE_256G;
-
-    MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, start, size, &range), NV_OK);
-
-    TEST_CHECK_RET(range.table == tree.root->entries[0]->entries[2]);
-    TEST_CHECK_RET(range.entry_count == 512);
-    TEST_CHECK_RET(range.table->depth == 2);
-    TEST_CHECK_RET(range.page_size == UVM_PAGE_SIZE_256G);
-    TEST_CHECK_RET(tree.root->ref_count == 1);
-
-    uvm_page_tree_put_ptes(&tree, &range);
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
 static NV_STATUS split_4k_from_2m(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -902,43 +801,6 @@ static NV_STATUS split_2m_from_512m(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS split_512m_from_256g(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range_256g;
-    uvm_page_table_range_t range_adj;
-    uvm_page_table_range_t range_512m;
-
-    NvU64 start = 1UL << 48;
-    NvLength size = UVM_PAGE_SIZE_256G;
-
-    MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, start, size, &range_256g), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, start + size, size, &range_adj), NV_OK);
-
-    TEST_CHECK_RET(range_256g.entry_count == 1);
-    TEST_CHECK_RET(range_256g.table->depth == 2);
-    TEST_CHECK_RET(range_adj.entry_count == 1);
-    TEST_CHECK_RET(range_adj.table->depth == 2);
-
-    // Need to release the 256G page so that the reference count is right.
-    uvm_page_tree_put_ptes(&tree, &range_256g);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_512M, start, size, &range_512m), NV_OK);
-
-    TEST_CHECK_RET(range_512m.entry_count == 512);
-    TEST_CHECK_RET(range_512m.table->depth == 3);
-    TEST_CHECK_RET(range_512m.table == tree.root->entries[0]->entries[2]->entries[0]);
-    TEST_CHECK_RET(range_512m.start_index == 0);
-
-    // Free everything
-    uvm_page_tree_put_ptes(&tree, &range_adj);
-    uvm_page_tree_put_ptes(&tree, &range_512m);
-
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
 static NV_STATUS get_512mb_range(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -977,25 +839,6 @@ static NV_STATUS get_2gb_range(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS get_1tb_range(uvm_gpu_t *gpu)
-{
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t range;
-
-    NvU64 start = UVM_SIZE_1TB;
-    NvU64 size = start;
-
-    MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_256G, start, size, &range), NV_OK);
-    TEST_CHECK_RET(range.entry_count == 4);
-    TEST_CHECK_RET(range.table->depth == 2);
-    TEST_CHECK_RET(range.start_index == 4);
-    uvm_page_tree_put_ptes(&tree, &range);
-    uvm_page_tree_deinit(&tree);
-
-    return NV_OK;
-}
-
 static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -1089,8 +932,8 @@ static NV_STATUS split_and_free(uvm_gpu_t *gpu)

 static NV_STATUS check_sizes(uvm_gpu_t *gpu)
 {
-    NvU64 user_sizes = UVM_PAGE_SIZE_2M;
-    NvU64 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
+    NvU32 user_sizes = UVM_PAGE_SIZE_2M;
+    NvU32 kernel_sizes = UVM_PAGE_SIZE_4K | 256;

    if (UVM_PAGE_SIZE_64K >= PAGE_SIZE)
        user_sizes |= UVM_PAGE_SIZE_64K;
@@ -1193,7 +1036,7 @@ static NV_STATUS fast_split_double_backoff(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS test_tlb_invalidates_gmmu_v2(uvm_gpu_t *gpu)
+static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
 {
    NV_STATUS status = NV_OK;
    uvm_page_tree_t tree;
@@ -1256,80 +1099,11 @@ static NV_STATUS test_tlb_invalidates_gmmu_v2(uvm_gpu_t *gpu)
    return status;
 }

-static NV_STATUS test_tlb_invalidates_gmmu_v3(uvm_gpu_t *gpu)
-{
-    NV_STATUS status = NV_OK;
-    uvm_page_tree_t tree;
-    uvm_page_table_range_t entries[6];
-    int i;
-
-    // Depth 5
-    NvU64 extent_pte = UVM_PAGE_SIZE_2M;
-
-    // Depth 4
-    NvU64 extent_pde0 = extent_pte * (1ull << 8);
-
-    // Depth 3
-    NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
-
-    // Depth 2
-    NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
-
-    // Depth 1
-    NvU64 extent_pde3 = extent_pde2 * (1ull << 9);
-
-    MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
-
-    fake_tlb_invals_enable();
-
-    TEST_CHECK_RET(assert_entry_invalidate(&tree, UVM_PAGE_SIZE_4K, 0, 0, true));
-    TEST_CHECK_RET(assert_entry_invalidate(&tree, UVM_PAGE_SIZE_4K, 0, 0, true));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_4K, 0, &entries[0]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(0, false));
-
-    TEST_CHECK_RET(assert_entry_no_invalidate(&tree, UVM_PAGE_SIZE_4K, extent_pte - UVM_PAGE_SIZE_4K));
-
-    TEST_CHECK_RET(assert_entry_invalidate(&tree, UVM_PAGE_SIZE_64K, 0, 4, true));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_64K, 0, &entries[1]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(4, false));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_4K, extent_pde0, &entries[2]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(3, false));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_4K, extent_pde1, &entries[3]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(2, false));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_4K, extent_pde2, &entries[4]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(1, false));
-
-    TEST_CHECK_RET(test_page_tree_get_entry(&tree, UVM_PAGE_SIZE_4K, extent_pde3, &entries[5]) == NV_OK);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(0, false));
-
-    for (i = 5; i > 1; --i) {
-        uvm_page_tree_put_ptes(&tree, &entries[i]);
-        TEST_CHECK_RET(assert_and_reset_last_invalidate(5 - i, true));
-    }
-
-    uvm_page_tree_put_ptes(&tree, &entries[0]);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(4, true));
-
-    uvm_page_tree_put_ptes(&tree, &entries[1]);
-    TEST_CHECK_RET(assert_and_reset_last_invalidate(0, true));
-
-    fake_tlb_invals_disable();
-
-    uvm_page_tree_deinit(&tree);
-
-    return status;
-}
-
 static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
                                                 NvU64 base,
                                                 NvU64 size,
-                                                 NvU64 min_page_size,
-                                                 NvU64 max_page_size)
+                                                 NvU32 min_page_size,
+                                                 NvU32 max_page_size)
 {
    NV_STATUS status = NV_OK;
    uvm_push_t push;
@@ -1351,7 +1125,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
        uvm_tlb_batch_begin(tree, &batch);

        for (j = 0; j < i; ++j) {
-            NvU64 used_max_page_size = (j & 1) ? max_page_size : min_page_size;
+            NvU32 used_max_page_size = (j & 1) ? max_page_size : min_page_size;
            NvU32 expected_range_depth = tree->hal->page_table_depth(used_max_page_size);
            expected_inval_all_depth = min(expected_inval_all_depth, expected_range_depth);
            uvm_tlb_batch_invalidate(&batch,
@@ -1365,7 +1139,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
        uvm_tlb_batch_end(&batch, &push, UVM_MEMBAR_NONE);

        for (j = 0; j < i; ++j) {
-            NvU64 used_max_page_size = (j & 1) ? max_page_size : min_page_size;
+            NvU32 used_max_page_size = (j & 1) ? max_page_size : min_page_size;
            NvU32 expected_range_depth = tree->hal->page_table_depth(used_max_page_size);
            bool allow_inval_all = (total_pages > gpu->parent->tlb_batch.max_pages) ||
                                   !gpu->parent->tlb_batch.va_invalidate_supported ||
@@ -1387,7 +1161,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
    return status;
 }

-static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_sizes, const NvU32 page_sizes_count)
+static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_sizes, const NvU32 page_sizes_count)
 {
    NV_STATUS status = NV_OK;
    uvm_page_tree_t tree;
@@ -1403,8 +1177,8 @@ static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_si
    for (min_index = 0; min_index < page_sizes_count; ++min_index) {
        for (max_index = min_index; max_index < page_sizes_count; ++max_index) {
            for (size_index = 0; size_index < ARRAY_SIZE(sizes_in_max_pages); ++size_index) {
-                NvU64 min_page_size = page_sizes[min_index];
-                NvU64 max_page_size = page_sizes[max_index];
+                NvU32 min_page_size = page_sizes[min_index];
+                NvU32 max_page_size = page_sizes[max_index];
                NvU64 size = (NvU64)sizes_in_max_pages[size_index] * max_page_size;

                TEST_CHECK_GOTO(test_tlb_batch_invalidates_case(&tree,
@@ -1508,7 +1282,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
 static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
                                       NvU64 start,
                                       NvU64 size,
-                                       NvU64 page_size,
+                                       NvU32 page_size,
                                       uvm_page_table_range_vec_t **range_vec_out)
 {
    uvm_page_table_range_vec_t *range_vec;
@@ -1529,7 +1303,7 @@ static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
 // Test page table range vector APIs.
 // Notably the test leaks the page_tree and range_vec on error as it's hard to
 // clean up on failure and the destructors would likely assert.
-static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
+static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
 {
    NV_STATUS status = NV_OK;
    uvm_page_tree_t tree;
@@ -1737,7 +1511,7 @@ static uvm_mmu_page_table_alloc_t fake_table_alloc(uvm_aperture_t aperture, NvU6
 // Queries the supported page sizes of the GPU(uvm_gpu_t) and fills the
 // page_sizes array up to MAX_NUM_PAGE_SIZE. Returns the number of elements in
 // page_sizes;
-static size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
+size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
 {
    unsigned long page_size_log2;
    unsigned long page_sizes_bitvec;
@@ -1750,7 +1524,7 @@ static size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
    page_sizes_bitvec = hal->page_sizes();

    for_each_set_bit(page_size_log2, &page_sizes_bitvec, BITS_PER_LONG) {
-        NvU64 page_size = 1ULL << page_size_log2;
+        NvU32 page_size = (NvU32)(1ULL << page_size_log2);
        UVM_ASSERT(count < MAX_NUM_PAGE_SIZES);
        page_sizes[count++] = page_size;
    }
@@ -1794,24 +1568,18 @@ static NV_STATUS entry_test_page_size_hopper(uvm_gpu_t *gpu, size_t page_size)
    return NV_OK;
 }

-static NV_STATUS entry_test_page_size_blackwell(uvm_gpu_t *gpu, size_t page_size)
-{
-    return entry_test_page_size_hopper(gpu, page_size);
-}
-
 typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size);

 static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
 {
-    static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
+    static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
    NvU64 pde_bits;
    uvm_mmu_page_table_alloc_t *phys_allocs[2];
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
    uvm_mmu_mode_hal_t *hal;
    uvm_page_directory_t dir;
-    NvU64 big_page_size, page_size;
-    NvU32 i, j;
+    NvU32 i, j, big_page_size, page_size;

    dir.depth = 0;

@@ -1895,7 +1663,7 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)

 static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU64 pde_bits[2];
    size_t i, num_page_sizes;
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
@@ -1991,7 +1759,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent

 static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU64 pde_bits[2];
    size_t i, num_page_sizes;
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
@@ -2065,7 +1833,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr

 static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU32 i, num_page_sizes;

    num_page_sizes = get_page_sizes(gpu, page_sizes);
@@ -2079,7 +1847,7 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
 static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
    NV_STATUS status = NV_OK;
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU64 pde_bits[2];
    uvm_page_directory_t *dirs[5];
    size_t i, num_page_sizes;
@@ -2277,11 +2045,6 @@ cleanup:
    return status;
 }

-static NV_STATUS entry_test_blackwell(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
-{
-    return entry_test_ampere(gpu, entry_test_page_size_blackwell);
-}
-
 static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -2319,7 +2082,7 @@ static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static NV_STATUS shrink_test(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
+static NV_STATUS shrink_test(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
 {
    uvm_page_tree_t tree;
    uvm_page_table_range_t range;
@@ -2371,7 +2134,7 @@ static NV_STATUS shrink_test(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_siz
    return NV_OK;
 }

-static NV_STATUS get_upper_test(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
+static NV_STATUS get_upper_test(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
 {
    uvm_page_tree_t tree;
    uvm_page_table_range_t range, upper_range;
@@ -2524,19 +2287,11 @@ static NV_STATUS fake_gpu_init_hopper(uvm_gpu_t *fake_gpu)
                         fake_gpu);
 }

-static NV_STATUS fake_gpu_init_blackwell(uvm_gpu_t *fake_gpu)
-{
-    return fake_gpu_init(BLACKWELL_CHANNEL_GPFIFO_A,
-                         BLACKWELL_DMA_COPY_A,
-                         NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
-                         fake_gpu);
-}
-
 static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell)
 {
    // create a fake Maxwell GPU for this test.
-    static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
-    NvU64 i, j, big_page_size, page_size;
+    static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
+    NvU32 i, j, big_page_size, page_size;

    TEST_CHECK_RET(fake_gpu_init_maxwell(maxwell) == NV_OK);

@@ -2565,7 +2320,7 @@ static NV_STATUS pascal_test_page_tree(uvm_gpu_t *pascal)
    // create a fake Pascal GPU for this test.
    NvU32 tlb_batch_saved_max_pages;
    NvU32 i;
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    size_t num_page_sizes;

    TEST_CHECK_RET(fake_gpu_init_pascal(pascal) == NV_OK);
@@ -2591,7 +2346,7 @@ static NV_STATUS pascal_test_page_tree(uvm_gpu_t *pascal)
    MEM_NV_CHECK_RET(check_sizes(pascal), NV_OK);
    MEM_NV_CHECK_RET(fast_split_normal(pascal), NV_OK);
    MEM_NV_CHECK_RET(fast_split_double_backoff(pascal), NV_OK);
-    MEM_NV_CHECK_RET(test_tlb_invalidates_gmmu_v2(pascal), NV_OK);
+    MEM_NV_CHECK_RET(test_tlb_invalidates(pascal), NV_OK);
    MEM_NV_CHECK_RET(test_tlb_batch_invalidates(pascal, page_sizes, num_page_sizes), NV_OK);

    // Run the test again with a bigger limit on max pages
@@ -2626,7 +2381,7 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
 static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
 {
    NvU32 i, tlb_batch_saved_max_pages;
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    size_t num_page_sizes;

    TEST_CHECK_RET(fake_gpu_init_ampere(ampere) == NV_OK);
@@ -2647,7 +2402,7 @@ static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
    MEM_NV_CHECK_RET(entry_test_ampere(ampere, entry_test_page_size_ampere), NV_OK);

    // TLB invalidate
-    MEM_NV_CHECK_RET(test_tlb_invalidates_gmmu_v2(ampere), NV_OK);
+    MEM_NV_CHECK_RET(test_tlb_invalidates(ampere), NV_OK);

    // TLB batch invalidate
    MEM_NV_CHECK_RET(test_tlb_batch_invalidates(ampere, page_sizes, num_page_sizes), NV_OK);
@@ -2682,55 +2437,6 @@ static NV_STATUS hopper_test_page_tree(uvm_gpu_t *hopper)
    return NV_OK;
 }

-static NV_STATUS blackwell_test_page_tree(uvm_gpu_t *blackwell)
-{
-    NvU32 i, tlb_batch_saved_max_pages;
-    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
-    size_t num_page_sizes;
-
-    TEST_CHECK_RET(fake_gpu_init_blackwell(blackwell) == NV_OK);
-
-    num_page_sizes = get_page_sizes(blackwell, page_sizes);
-    UVM_ASSERT(num_page_sizes > 0);
-
-    MEM_NV_CHECK_RET(alloc_256g_memory(blackwell), NV_OK);
-    MEM_NV_CHECK_RET(alloc_adjacent_256g_memory(blackwell), NV_OK);
-    MEM_NV_CHECK_RET(get_single_page_256g(blackwell), NV_OK);
-    MEM_NV_CHECK_RET(get_entire_table_256g(blackwell), NV_OK);
-
-    // Although there is no support for the 256GM page size for managed memory,
-    // we run tests that split a 256G page into 512x512M pages because UVM
-    // handles the PTEs for all supported page sizes.
-    MEM_NV_CHECK_RET(split_512m_from_256g(blackwell), NV_OK);
-    MEM_NV_CHECK_RET(get_1tb_range(blackwell), NV_OK);
-    MEM_NV_CHECK_RET(entry_test_blackwell(blackwell, entry_test_page_size_blackwell), NV_OK);
-
-    // TLB invalidate
-    MEM_NV_CHECK_RET(test_tlb_invalidates_gmmu_v3(blackwell), NV_OK);
-
-    // TLB batch invalidate
-    MEM_NV_CHECK_RET(test_tlb_batch_invalidates(blackwell, page_sizes, num_page_sizes), NV_OK);
-
-    // Run the test again with a bigger limit on max pages
-    tlb_batch_saved_max_pages = blackwell->parent->tlb_batch.max_pages;
-    blackwell->parent->tlb_batch.max_pages = 1024 * 1024;
-    MEM_NV_CHECK_RET(test_tlb_batch_invalidates(blackwell, page_sizes, num_page_sizes), NV_OK);
-    blackwell->parent->tlb_batch.max_pages = tlb_batch_saved_max_pages;
-
-    // And with per VA invalidates disabled
-    blackwell->parent->tlb_batch.va_invalidate_supported = false;
-    MEM_NV_CHECK_RET(test_tlb_batch_invalidates(blackwell, page_sizes, num_page_sizes), NV_OK);
-    blackwell->parent->tlb_batch.va_invalidate_supported = true;
-
-    for (i = 0; i < num_page_sizes; i++) {
-        MEM_NV_CHECK_RET(shrink_test(blackwell, BIG_PAGE_SIZE_PASCAL, page_sizes[i]), NV_OK);
-        MEM_NV_CHECK_RET(get_upper_test(blackwell, BIG_PAGE_SIZE_PASCAL, page_sizes[i]), NV_OK);
-        MEM_NV_CHECK_RET(test_range_vec(blackwell, BIG_PAGE_SIZE_PASCAL, page_sizes[i]), NV_OK);
-    }
-
-    return NV_OK;
-}
-
 NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *filp)
 {
    NV_STATUS status = NV_OK;
@@ -2771,7 +2477,6 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
    TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(hopper_test_page_tree(gpu), done);
-    TEST_NV_CHECK_GOTO(blackwell_test_page_tree(gpu), done);

 done:
    fake_tlb_invals_free();
--- a/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
@@ -323,3 +323,10 @@ NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
 {
    return NVB069_FAULT_BUF_SIZE;
 }
+
+void uvm_hal_pascal_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                        void *fault_packet,
+                                                                        uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry called on Pascal GPU\n");
+}
--- a/kernel-open/nvidia-uvm/uvm_pascal_host.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_host.c
@@ -92,13 +92,7 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push, uvm_gpu_phys_addre
    uvm_hal_tlb_invalidate_membar(push, membar);
 }

-void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
-                                           uvm_gpu_phys_address_t pdb,
-                                           NvU32 depth,
-                                           NvU64 base,
-                                           NvU64 size,
-                                           NvU64 page_size,
-                                           uvm_membar_t membar)
+void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb, NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
 {
    NvU32 aperture_value;
    NvU32 page_table_level;
@@ -133,9 +127,9 @@ void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
        ack_value = HWCONST(C06F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
    }

-    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
-    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
    UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);

    base >>= 12;
--- a/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
@@ -37,7 +37,6 @@
 #include "uvm_global.h"
 #include "uvm_gpu.h"
 #include "uvm_mmu.h"
-#include "uvm_hal.h"
 #include "uvm_push_macros.h"
 #include "uvm_pascal_fault_buffer.h"
 #include "hwref/pascal/gp100/dev_fault.h"
@@ -55,7 +54,7 @@ static NvU32 entries_per_index_pascal(NvU32 depth)
    return 1;
 }

-static NvLength entry_offset_pascal(NvU32 depth, NvU64 page_size)
+static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 5);
    if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
@@ -179,7 +178,7 @@ static NvLength entry_size_pascal(NvU32 depth)
        return 8;
 }

-static NvU32 index_bits_pascal(NvU32 depth, NvU64 page_size)
+static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size)
 {
    static const NvU32 bit_widths[] = {2, 9, 9, 8};
    // some code paths keep on querying this until they get a 0, meaning only the page offset remains.
@@ -205,7 +204,7 @@ static NvU32 num_va_bits_pascal(void)
    return 49;
 }

-static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
+static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
 {
    UVM_ASSERT(depth < 5);
    if (depth == 4 && page_size == UVM_PAGE_SIZE_64K)
@@ -214,7 +213,7 @@ static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
    return 4096;
 }

-static NvU32 page_table_depth_pascal(NvU64 page_size)
+static NvU32 page_table_depth_pascal(NvU32 page_size)
 {
    if (page_size == UVM_PAGE_SIZE_2M)
        return 3;
@@ -222,12 +221,12 @@ static NvU32 page_table_depth_pascal(NvU64 page_size)
        return 4;
 }

-static NvU64 page_sizes_pascal(void)
+static NvU32 page_sizes_pascal(void)
 {
    return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
 }

-static NvU64 unmapped_pte_pascal(NvU64 page_size)
+static NvU64 unmapped_pte_pascal(NvU32 page_size)
 {
    // Setting the privilege bit on an otherwise-zeroed big PTE causes the
    // corresponding 4k PTEs to be ignored. This allows the invalidation of a
@@ -363,7 +362,7 @@ static uvm_mmu_mode_hal_t pascal_mmu_mode_hal =
    .page_sizes = page_sizes_pascal
 };

-uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size)
 {
    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Andy Ritger	5e52edb203	550.120	2024-09-20 12:40:39 -07:00
Bernhard Stoeckner	2cca8b3fd5	550.107.02	2024-07-29 10:22:58 +02:00