515.43.04

2026-02-20 15:03:58 +00:00 · 2022-05-09 13:18:59 -07:00
commit 1739a20efc
2519 changed files with 1060036 additions and 0 deletions
--- a/kernel-open/nvidia-uvm/cla06fsubch.h
+++ b/kernel-open/nvidia-uvm/cla06fsubch.h
@@ -0,0 +1,29 @@
+/*******************************************************************************
+    Copyright (c) 2013 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __cla06fsubch_h__
+#define __cla06fsubch_h__
+
+#define NVA06F_SUBCHANNEL_COPY_ENGINE                   4
+
+#endif // {__cla06fsubch_h__}
--- a/kernel-open/nvidia-uvm/cla16f.h
+++ b/kernel-open/nvidia-uvm/cla16f.h
@@ -0,0 +1,155 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _cla16f_h_
+#define _cla16f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+#define KEPLER_CHANNEL_GPFIFO_B                                    (0x0000A16F)
+
+#define NVA16F_SET_OBJECT                                          (0x00000000)
+#define NVA16F_NOP                                                 (0x00000008)
+#define NVA16F_NOP_HANDLE                                                 31:0
+#define NVA16F_SEMAPHOREA                                          (0x00000010)
+#define NVA16F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVA16F_SEMAPHOREB                                          (0x00000014)
+#define NVA16F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVA16F_SEMAPHOREC                                          (0x00000018)
+#define NVA16F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVA16F_SEMAPHORED                                          (0x0000001C)
+#define NVA16F_SEMAPHORED_OPERATION                                        4:0
+#define NVA16F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVA16F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVA16F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVA16F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVA16F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
+#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+#define NVA16F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVA16F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVA16F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVA16F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVA16F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVA16F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+#define NVA16F_SEMAPHORED_REDUCTION                                      30:27
+#define NVA16F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVA16F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVA16F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVA16F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVA16F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVA16F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVA16F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVA16F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVA16F_SEMAPHORED_FORMAT                                         31:31
+#define NVA16F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVA16F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
+#define NVA16F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVA16F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVA16F_FB_FLUSH                                            (0x00000024)
+#define NVA16F_FB_FLUSH_HANDLE                                            31:0
+#define NVA16F_MEM_OP_A                                            (0x00000028)
+#define NVA16F_MEM_OP_A_OPERAND_LOW                                       31:2
+#define NVA16F_MEM_OP_A_TLB_INVALIDATE_ADDR                               29:2
+#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET                            31:30
+#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_VID_MEM               0x00000000
+#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT      0x00000002
+#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT   0x00000003
+#define NVA16F_MEM_OP_B                                            (0x0000002c)
+#define NVA16F_MEM_OP_B_OPERAND_HIGH                                       7:0
+#define NVA16F_MEM_OP_B_OPERATION                                        31:27
+#define NVA16F_MEM_OP_B_OPERATION_SYSMEMBAR_FLUSH                   0x00000005
+#define NVA16F_MEM_OP_B_OPERATION_SOFT_FLUSH                        0x00000006
+#define NVA16F_MEM_OP_B_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVA16F_MEM_OP_B_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVA16F_MEM_OP_B_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+#define NVA16F_MEM_OP_B_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVA16F_MEM_OP_B_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB                             0:0
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ONE                  0x00000000
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ALL                  0x00000001
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC                             1:1
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_ENABLE               0x00000000
+#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_DISABLE              0x00000001
+#define NVA16F_SET_REFERENCE                                       (0x00000050)
+#define NVA16F_SET_REFERENCE_COUNT                                        31:0
+#define NVA16F_WFI                                                 (0x00000078)
+#define NVA16F_WFI_HANDLE                                                 31:0
+
+/* GPFIFO entry format */
+#define NVA16F_GP_ENTRY__SIZE                                   8
+#define NVA16F_GP_ENTRY0_FETCH                                0:0
+#define NVA16F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVA16F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
+#define NVA16F_GP_ENTRY0_GET                                 31:2
+#define NVA16F_GP_ENTRY0_OPERAND                             31:0
+#define NVA16F_GP_ENTRY1_GET_HI                               7:0
+#define NVA16F_GP_ENTRY1_PRIV                                 8:8
+#define NVA16F_GP_ENTRY1_PRIV_USER                     0x00000000
+#define NVA16F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
+#define NVA16F_GP_ENTRY1_LEVEL                                9:9
+#define NVA16F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVA16F_GP_ENTRY1_LENGTH                             30:10
+
+/* dma method formats */
+#define NVA16F_DMA_METHOD_ADDRESS                                  11:0
+#define NVA16F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVA16F_DMA_METHOD_COUNT                                    28:16
+#define NVA16F_DMA_SEC_OP                                          31:29
+#define NVA16F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVA16F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+
+/* dma incrementing method format */
+#define NVA16F_DMA_INCR_ADDRESS                                    11:0
+#define NVA16F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVA16F_DMA_INCR_COUNT                                      28:16
+#define NVA16F_DMA_INCR_OPCODE                                     31:29
+#define NVA16F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVA16F_DMA_INCR_DATA                                       31:0
+
+/* dma non-incrementing method format */
+#define NVA16F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVA16F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVA16F_DMA_NONINCR_COUNT                                   28:16
+#define NVA16F_DMA_NONINCR_OPCODE                                  31:29
+#define NVA16F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVA16F_DMA_NONINCR_DATA                                    31:0
+
+/* dma immediate-data format */
+#define NVA16F_DMA_IMMD_ADDRESS                                    11:0
+#define NVA16F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVA16F_DMA_IMMD_DATA                                       28:16
+#define NVA16F_DMA_IMMD_OPCODE                                     31:29
+#define NVA16F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _cla16F_h_ */
--- a/kernel-open/nvidia-uvm/clb069.h
+++ b/kernel-open/nvidia-uvm/clb069.h
@@ -0,0 +1,62 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVidia Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+#ifndef _clb069_h_
+#define _clb069_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAXWELL_FAULT_BUFFER_A (0xb069)
+
+#define NVB069_FAULT_BUF_ENTRY                                      0x0000001f:0x00000000 
+#define NVB069_FAULT_BUF_SIZE                                                          32 
+#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE                            MW((9+0*32):(0*32+8)) 
+#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_VID_MEM                           0x00000000 
+#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT                  0x00000002 
+#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT               0x00000003 
+#define NVB069_FAULT_BUF_ENTRY_INST_LO                                MW((31+0*32):(0*32+12)) 
+#define NVB069_FAULT_BUF_ENTRY_INST_HI                                 MW((31+1*32):(1*32+0)) 
+#define NVB069_FAULT_BUF_ENTRY_INST                                   MW((31+1*32):(0*32+12))
+#define NVB069_FAULT_BUF_ENTRY_ADDR_LO                                 MW((31+2*32):(2*32+0)) 
+#define NVB069_FAULT_BUF_ENTRY_ADDR_HI                                 MW((31+3*32):(3*32+0)) 
+#define NVB069_FAULT_BUF_ENTRY_ADDR                                    MW((31+3*32):(2*32+0))
+#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP_LO                            MW((31+4*32):(4*32+0))
+#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP_HI                            MW((31+5*32):(5*32+0))
+#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP                               MW((31+5*32):(4*32+0))
+#define NVB069_FAULT_BUF_ENTRY_RESERVED                                MW((31+6*32):(6*32+0))
+#define NVB069_FAULT_BUF_ENTRY_FAULT_TYPE                               MW((4+7*32):(7*32+0))
+#define NVB069_FAULT_BUF_ENTRY_CLIENT                                  MW((14+7*32):(7*32+8))
+#define NVB069_FAULT_BUF_ENTRY_ACCESS_TYPE                            MW((18+7*32):(7*32+16))
+#define NVB069_FAULT_BUF_ENTRY_MMU_CLIENT_TYPE                        MW((20+7*32):(7*32+20))
+#define NVB069_FAULT_BUF_ENTRY_GPC_ID                                 MW((28+7*32):(7*32+24))
+#define NVB069_FAULT_BUF_ENTRY_VALID                                  MW((31+7*32):(7*32+31))
+#define NVB069_FAULT_BUF_ENTRY_VALID_FALSE                                     0x00000000 
+#define NVB069_FAULT_BUF_ENTRY_VALID_TRUE                                      0x00000001 
+#define NVB069_NOTIFIERS_REPLAYABLE_FAULT                           (0)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clb069_h_ */
+
--- a/kernel-open/nvidia-uvm/clb06f.h
+++ b/kernel-open/nvidia-uvm/clb06f.h
@@ -0,0 +1,140 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _clB06f_h_
+#define _clB06f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+#define  MAXWELL_CHANNEL_GPFIFO_A                           (0x0000B06F)
+
+/* class MAXWELL_CHANNEL_GPFIFO  */
+#define NVB06F_SET_OBJECT                                          (0x00000000)
+#define NVB06F_NOP                                                 (0x00000008)
+#define NVB06F_NOP_HANDLE                                                 31:0
+#define NVB06F_SEMAPHOREA                                          (0x00000010)
+#define NVB06F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVB06F_SEMAPHOREB                                          (0x00000014)
+#define NVB06F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVB06F_SEMAPHOREC                                          (0x00000018)
+#define NVB06F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVB06F_SEMAPHORED                                          (0x0000001C)
+#define NVB06F_SEMAPHORED_OPERATION                                        4:0
+#define NVB06F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVB06F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+#define NVB06F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVB06F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVB06F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVB06F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+
+#define NVB06F_NON_STALL_INTERRUPT                                 (0x00000020)
+// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
+// possible future MEM_OP features.  MEM_OP_C/D have identical functionality
+// to the previous MEM_OP_A/B methods.
+#define NVB06F_MEM_OP_C                                            (0x00000030)
+#define NVB06F_MEM_OP_C_OPERAND_LOW                                       31:2
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET                            11:10
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_VID_MEM               0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT      0x00000002
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT   0x00000003
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_ADDR_LO                           31:12
+#define NVB06F_MEM_OP_D                                            (0x00000034)
+#define NVB06F_MEM_OP_D_OPERAND_HIGH                                       7:0
+#define NVB06F_MEM_OP_D_OPERATION                                        31:27
+#define NVB06F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVB06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVB06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVB06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI                             7:0
+#define NVB06F_WFI                                                 (0x00000078)
+
+/* GPFIFO entry format */
+#define NVB06F_GP_ENTRY__SIZE                                   8
+#define NVB06F_GP_ENTRY0_GET                                 31:2
+#define NVB06F_GP_ENTRY0_OPERAND                             31:0
+#define NVB06F_GP_ENTRY1_GET_HI                               7:0
+#define NVB06F_GP_ENTRY1_PRIV                                 8:8
+#define NVB06F_GP_ENTRY1_PRIV_USER                     0x00000000
+#define NVB06F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
+#define NVB06F_GP_ENTRY1_LEVEL                                9:9
+#define NVB06F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVB06F_GP_ENTRY1_LENGTH                             30:10
+
+/* dma method formats */
+#define NVB06F_DMA_SEC_OP                                          31:29
+#define NVB06F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVB06F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+/* dma incrementing method format */
+#define NVB06F_DMA_INCR_ADDRESS                                    11:0
+#define NVB06F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVB06F_DMA_INCR_COUNT                                      28:16
+#define NVB06F_DMA_INCR_OPCODE                                     31:29
+#define NVB06F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVB06F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVB06F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVB06F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVB06F_DMA_NONINCR_COUNT                                   28:16
+#define NVB06F_DMA_NONINCR_OPCODE                                  31:29
+#define NVB06F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVB06F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVB06F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVB06F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVB06F_DMA_ONEINCR_COUNT                                   28:16
+#define NVB06F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVB06F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVB06F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVB06F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVB06F_DMA_IMMD_ADDRESS                                    11:0
+#define NVB06F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVB06F_DMA_IMMD_DATA                                       28:16
+#define NVB06F_DMA_IMMD_OPCODE                                     31:29
+#define NVB06F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clB06F_h_ */
--- a/kernel-open/nvidia-uvm/clb0b5.h
+++ b/kernel-open/nvidia-uvm/clb0b5.h
@@ -0,0 +1,191 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nvtypes.h"
+
+#ifndef _clb0b5_h_
+#define _clb0b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAXWELL_DMA_COPY_A                                                            (0x0000B0B5)
+
+#define NVB0B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVB0B5_SET_SEMAPHORE_A_UPPER                                            7:0
+#define NVB0B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVB0B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVB0B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVB0B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVB0B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVB0B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVB0B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVB0B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVB0B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVB0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVB0B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVB0B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVB0B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVB0B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVB0B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVB0B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVB0B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVB0B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVB0B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVB0B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVB0B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVB0B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVB0B5_LAUNCH_DMA_BYPASS_L2                                             20:20
+#define NVB0B5_LAUNCH_DMA_BYPASS_L2_USE_PTE_SETTING                             (0x00000000)
+#define NVB0B5_LAUNCH_DMA_BYPASS_L2_FORCE_VOLATILE                              (0x00000001)
+#define NVB0B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVB0B5_OFFSET_IN_UPPER_UPPER                                            7:0
+#define NVB0B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVB0B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVB0B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVB0B5_OFFSET_OUT_UPPER_UPPER                                           7:0
+#define NVB0B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVB0B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVB0B5_PITCH_IN                                                         (0x00000410)
+#define NVB0B5_PITCH_IN_VALUE                                                   31:0
+#define NVB0B5_PITCH_OUT                                                        (0x00000414)
+#define NVB0B5_PITCH_OUT_VALUE                                                  31:0
+#define NVB0B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVB0B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVB0B5_LINE_COUNT                                                       (0x0000041C)
+#define NVB0B5_LINE_COUNT_VALUE                                                 31:0
+#define NVB0B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVB0B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVB0B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVB0B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVB0B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clb0b5_h
--- a/kernel-open/nvidia-uvm/clc06f.h
+++ b/kernel-open/nvidia-uvm/clc06f.h
@@ -0,0 +1,173 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _clc06f_h_
+#define _clc06f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+#define  PASCAL_CHANNEL_GPFIFO_A                           (0x0000C06F)
+
+/* class PASCAL_CHANNEL_GPFIFO_A */
+#define NVC06F_SET_OBJECT                                          (0x00000000)
+#define NVC06F_NOP                                                 (0x00000008)
+#define NVC06F_NOP_HANDLE                                                 31:0
+#define NVC06F_SEMAPHOREA                                          (0x00000010)
+#define NVC06F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVC06F_SEMAPHOREB                                          (0x00000014)
+#define NVC06F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVC06F_SEMAPHOREC                                          (0x00000018)
+#define NVC06F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVC06F_SEMAPHORED                                          (0x0000001C)
+#define NVC06F_SEMAPHORED_OPERATION                                        4:0
+#define NVC06F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVC06F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+
+
+/* GPFIFO entry format */
+#define NVC06F_GP_ENTRY__SIZE                                   8
+#define NVC06F_GP_ENTRY0_GET                                 31:2
+#define NVC06F_GP_ENTRY0_OPERAND                             31:0
+#define NVC06F_GP_ENTRY1_GET_HI                               7:0
+#define NVC06F_GP_ENTRY1_PRIV                                 8:8
+#define NVC06F_GP_ENTRY1_PRIV_USER                     0x00000000
+#define NVC06F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
+#define NVC06F_GP_ENTRY1_LEVEL                                9:9
+#define NVC06F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVC06F_GP_ENTRY1_LENGTH                             30:10
+
+/* dma incrementing method format */
+#define NVC06F_DMA_INCR_ADDRESS                                    11:0
+#define NVC06F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVC06F_DMA_INCR_COUNT                                      28:16
+#define NVC06F_DMA_INCR_OPCODE                                     31:29
+#define NVC06F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVC06F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVC06F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVC06F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVC06F_DMA_NONINCR_COUNT                                   28:16
+#define NVC06F_DMA_NONINCR_OPCODE                                  31:29
+#define NVC06F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVC06F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVC06F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVC06F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVC06F_DMA_ONEINCR_COUNT                                   28:16
+#define NVC06F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVC06F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVC06F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVC06F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVC06F_DMA_IMMD_ADDRESS                                    11:0
+#define NVC06F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVC06F_DMA_IMMD_DATA                                       28:16
+#define NVC06F_DMA_IMMD_OPCODE                                     31:29
+#define NVC06F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+
+// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
+// specifying the page address for a targeted TLB invalidate and the uTLB for
+// a targeted REPLAY_CANCEL for UVM.
+// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
+// rearranged fields.
+#define NVC06F_MEM_OP_A                                            (0x00000028)
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
+#define NVC06F_MEM_OP_B                                            (0x0000002c)
+#define NVC06F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
+#define NVC06F_MEM_OP_C                                            (0x00000030)
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE                                        2:0
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
+// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+#define NVC06F_MEM_OP_D                                            (0x00000034)
+#define NVC06F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
+#define NVC06F_MEM_OP_D_OPERATION                                        31:27
+#define NVC06F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
+#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+// CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
+#define NVC06F_SET_REFERENCE                                       (0x00000050)
+#define NVC06F_SET_REFERENCE_COUNT                                        31:0
+
+#define NVC06F_WFI                                                 (0x00000078)
+#define NVC06F_WFI_SCOPE                                                   0:0
+#define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
+#define NVC06F_WFI_SCOPE_ALL                                        0x00000001
+
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc06f_h_ */
--- a/kernel-open/nvidia-uvm/clc076.h
+++ b/kernel-open/nvidia-uvm/clc076.h
@@ -0,0 +1,82 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _clc076_h_
+#define _clc076_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+#define GP100_UVM_SW                                                (0x0000c076)
+
+#define NVC076_SET_OBJECT                                           (0x00000000)
+#define NVC076_NO_OPERATION                                         (0x00000100)
+
+/* Method data fields to support gpu fault cancel. These are pushed in order by UVM */
+
+#define NVC076_FAULT_CANCEL_A                                       (0x00000104)
+#define NVC076_FAULT_CANCEL_A_INST_APERTURE                         1:0
+#define NVC076_FAULT_CANCEL_A_INST_APERTURE_VID_MEM                 0x00000000
+#define NVC076_FAULT_CANCEL_A_INST_APERTURE_SYS_MEM_COHERENT        0x00000002
+#define NVC076_FAULT_CANCEL_A_INST_APERTURE_SYS_MEM_NONCOHERENT     0x00000003
+
+/* instance pointer is 4k aligned so those bits are reused to store the aperture */
+#define NVC076_FAULT_CANCEL_A_INST_LOW                              31:12
+
+#define NVC076_FAULT_CANCEL_B                                       (0x00000108)
+#define NVC076_FAULT_CANCEL_B_INST_HI                               31:0
+
+#define NVC076_FAULT_CANCEL_C                                       (0x0000010c)
+#define NVC076_FAULT_CANCEL_C_CLIENT_ID                             5:0
+#define NVC076_FAULT_CANCEL_C_GPC_ID                                10:6
+#define NVC076_FAULT_CANCEL_C_MODE                                  31:30 
+#define NVC076_FAULT_CANCEL_C_MODE_TARGETED                         0x00000000
+#define NVC076_FAULT_CANCEL_C_MODE_GLOBAL                           0x00000001
+
+/* Method data fields to support clearing faulted bit. These are pushed in order by UVM */
+
+#define NVC076_CLEAR_FAULTED_A                                       (0x00000110)
+
+#define NVC076_CLEAR_FAULTED_A_INST_APERTURE                         1:0
+#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_VID_MEM                 0x00000000
+#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_SYS_MEM_COHERENT        0x00000002
+#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_SYS_MEM_NONCOHERENT     0x00000003
+
+#define NVC076_CLEAR_FAULTED_A_TYPE                                  2:2
+#define NVC076_CLEAR_FAULTED_A_TYPE_PBDMA_FAULTED                    0x00000000
+#define NVC076_CLEAR_FAULTED_A_TYPE_ENG_FAULTED                      0x00000001
+
+/* instance pointer is 4k aligned */
+#define NVC076_CLEAR_FAULTED_A_INST_LOW                              31:12
+
+#define NVC076_CLEAR_FAULTED_B                                       (0x00000114)
+#define NVC076_CLEAR_FAULTED_B_INST_HI                               31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc076_h_ */
--- a/kernel-open/nvidia-uvm/clc0b5.h
+++ b/kernel-open/nvidia-uvm/clc0b5.h
@@ -0,0 +1,191 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nvtypes.h"
+
+#ifndef _clc0b5_h_
+#define _clc0b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PASCAL_DMA_COPY_A                                                            (0x0000C0B5)
+
+#define NVC0B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC0B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC0B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC0B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC0B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC0B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC0B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC0B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC0B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC0B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC0B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC0B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC0B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC0B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC0B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC0B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC0B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC0B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC0B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC0B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC0B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC0B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC0B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
+#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC0B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC0B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC0B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC0B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC0B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC0B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC0B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC0B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC0B5_PITCH_IN                                                         (0x00000410)
+#define NVC0B5_PITCH_IN_VALUE                                                   31:0
+#define NVC0B5_PITCH_OUT                                                        (0x00000414)
+#define NVC0B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC0B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC0B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC0B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC0B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC0B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC0B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC0B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC0B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC0B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc0b5_h
+
--- a/kernel-open/nvidia-uvm/clc1b5.h
+++ b/kernel-open/nvidia-uvm/clc1b5.h
@@ -0,0 +1,191 @@
+/*******************************************************************************
+    Copyright (c) 2014 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nvtypes.h"
+
+#ifndef _clc1b5_h_
+#define _clc1b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PASCAL_DMA_COPY_B                                                            (0x0000C1B5)
+
+#define NVC1B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC1B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC1B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC1B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC1B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC1B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC1B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC1B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC1B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC1B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC1B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC1B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC1B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC1B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC1B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC1B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC1B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC1B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC1B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC1B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC1B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC1B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC1B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC1B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
+#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC1B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC1B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC1B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC1B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC1B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC1B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC1B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC1B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC1B5_PITCH_IN                                                         (0x00000410)
+#define NVC1B5_PITCH_IN_VALUE                                                   31:0
+#define NVC1B5_PITCH_OUT                                                        (0x00000414)
+#define NVC1B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC1B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC1B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC1B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC1B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC1B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC1B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC1B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC1B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC1B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc1b5_h
+
--- a/kernel-open/nvidia-uvm/clc365.h
+++ b/kernel-open/nvidia-uvm/clc365.h
@@ -0,0 +1,93 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
+// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
+
+
+#ifndef _clc365_h_
+#define _clc365_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACCESS_COUNTER_NOTIFY_BUFFER (0xc365)
+
+#define NVC365_NOTIFY_BUF
+#define NVC365_NOTIFY_BUF_ENTRY                          0x0000001f:0x00000000
+#define NVC365_NOTIFY_BUF_SIZE                                              32
+#define NVC365_NOTIFY_BUF_ENTRY_TYPE                         MW((0+0*32):(0*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU                            0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_TYPE_GPU                            0x00000001
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE                    MW((1+0*32):(0*32+1))
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA                       0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA                       0x00000001
+#define NVC365_NOTIFY_BUF_ENTRY_BANK                         MW((5+0*32):(0*32+2))
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_0                              0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_1                              0x00000001
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_2                              0x00000002
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_3                              0x00000003
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_4                              0x00000004
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_5                              0x00000005
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_6                              0x00000006
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_7                              0x00000007
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_8                              0x00000008
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_9                              0x00000009
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_10                             0x0000000a
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_11                             0x0000000b
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_12                             0x0000000c
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_13                             0x0000000d
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_14                             0x0000000e
+#define NVC365_NOTIFY_BUF_ENTRY_BANK_15                             0x0000000f
+#define NVC365_NOTIFY_BUF_ENTRY_APERTURE                     MW((9+0*32):(0*32+8))
+#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM                    0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM                   0x00000001
+#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT           0x00000002
+#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT        0x00000003
+#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE              MW((11+0*32):(0*32+10))
+#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_VID_MEM               0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT      0x00000002
+#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT   0x00000003
+#define NVC365_NOTIFY_BUF_ENTRY_INST_LO                    MW((31+0*32):(0*32+12))
+#define NVC365_NOTIFY_BUF_ENTRY_INST_HI                     MW((31+1*32):(1*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_INST                       MW((31+1*32):(0*32+12))
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR_LO                     MW((31+2*32):(2*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR_HI                     MW((31+3*32):(3*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_ADDR                        MW((31+3*32):(2*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_SUB_GRANULARITY             MW((31+4*32):(4*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_NOTIFY_TAG                  MW((19+5*32):(5*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_COUNTER_VAL                 MW((15+6*32):(6*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_PEER_ID                      MW((2+7*32):(7*32+0))
+#define NVC365_NOTIFY_BUF_ENTRY_MMU_ENGINE_ID              MW((28+7*32):(7*32+20))
+#define NVC365_NOTIFY_BUF_ENTRY_VALID                      MW((31+7*32):(7*32+31))
+#define NVC365_NOTIFY_BUF_ENTRY_VALID_FALSE                         0x00000000
+#define NVC365_NOTIFY_BUF_ENTRY_VALID_TRUE                          0x00000001
+#define NVC365_NOTIFIERS_ACCESS_COUNTER                  (0)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc365_h_ */
--- a/kernel-open/nvidia-uvm/clc369.h
+++ b/kernel-open/nvidia-uvm/clc369.h
@@ -0,0 +1,82 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
+// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
+
+
+#ifndef _clc369_h_
+#define _clc369_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MMU_FAULT_BUFFER (0xc369)
+
+#define NVC369_BUF
+#define NVC369_BUF_ENTRY                                      0x0000001f:0x00000000
+#define NVC369_BUF_SIZE                                                          32
+#define NVC369_BUF_ENTRY_INST_APERTURE                            MW((9+0*32):(0*32+8))
+#define NVC369_BUF_ENTRY_INST_APERTURE_VID_MEM                           0x00000000
+#define NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT                  0x00000002
+#define NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT               0x00000003
+#define NVC369_BUF_ENTRY_INST_LO                                MW((31+0*32):(0*32+12))
+#define NVC369_BUF_ENTRY_INST_HI                                 MW((31+1*32):(1*32+0))
+#define NVC369_BUF_ENTRY_INST                                   MW((31+1*32):(0*32+12))
+#define NVC369_BUF_ENTRY_ADDR_PHYS_APERTURE                       MW((1+2*32):(2*32+0))
+#define NVC369_BUF_ENTRY_ADDR_LO                                MW((31+2*32):(2*32+12))
+#define NVC369_BUF_ENTRY_ADDR_HI                                 MW((31+3*32):(3*32+0))
+#define NVC369_BUF_ENTRY_ADDR                                   MW((31+3*32):(2*32+12))
+#define NVC369_BUF_ENTRY_TIMESTAMP_LO                            MW((31+4*32):(4*32+0))
+#define NVC369_BUF_ENTRY_TIMESTAMP_HI                            MW((31+5*32):(5*32+0))
+#define NVC369_BUF_ENTRY_TIMESTAMP                               MW((31+5*32):(4*32+0))
+#define NVC369_BUF_ENTRY_ENGINE_ID                                MW((8+6*32):(6*32+0))
+#define NVC369_BUF_ENTRY_FAULT_TYPE                               MW((4+7*32):(7*32+0))
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT                         MW((7+7*32):(7*32+7))
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_FALSE                          0x00000000
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_TRUE                           0x00000001
+#define NVC369_BUF_ENTRY_CLIENT                                  MW((14+7*32):(7*32+8))
+#define NVC369_BUF_ENTRY_ACCESS_TYPE                            MW((19+7*32):(7*32+16))
+#define NVC369_BUF_ENTRY_MMU_CLIENT_TYPE                        MW((20+7*32):(7*32+20))
+#define NVC369_BUF_ENTRY_GPC_ID                                 MW((28+7*32):(7*32+24))
+#define NVC369_BUF_ENTRY_PROTECTED_MODE                         MW((29+7*32):(7*32+29))
+#define NVC369_BUF_ENTRY_PROTECTED_MODE_FALSE                            0x00000000
+#define NVC369_BUF_ENTRY_PROTECTED_MODE_TRUE                             0x00000001
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN                    MW((30+7*32):(7*32+30))
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN_FALSE                       0x00000000
+#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN_TRUE                        0x00000001
+#define NVC369_BUF_ENTRY_VALID                                  MW((31+7*32):(7*32+31))
+#define NVC369_BUF_ENTRY_VALID_FALSE                                     0x00000000
+#define NVC369_BUF_ENTRY_VALID_TRUE                                      0x00000001
+#define NVC369_NOTIFIER_MMU_FAULT_NON_REPLAYABLE                    0
+#define NVC369_NOTIFIER_MMU_FAULT_REPLAYABLE                        1
+#define NVC369_NOTIFIER_MMU_FAULT_ERROR                             2
+#define NVC369_NOTIFIER_MMU_FAULT_NON_REPLAYABLE_IN_PRIV            3
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc369_h_ */
--- a/kernel-open/nvidia-uvm/clc36f.h
+++ b/kernel-open/nvidia-uvm/clc36f.h
@@ -0,0 +1,368 @@
+/*******************************************************************************
+    Copyright (c) 2012-2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef _clc36f_h_
+#define _clc36f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+/* class VOLTA_CHANNEL_GPFIFO  */
+/*
+ * Documentation for VOLTA_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ * Note there is no .mfs file for this class.
+ */
+#define  VOLTA_CHANNEL_GPFIFO_A                           (0x0000C36F)
+
+#define NVC36F_TYPEDEF                             VOLTA_CHANNELChannelGPFifoA
+
+/* dma flow control data structure */
+typedef volatile struct Nvc36fControl_struct {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} Nvc36fControl, VoltaAControlGPFifo;
+
+/* fields and values */
+#define NVC36F_NUMBER_OF_SUBCHANNELS                               (8)
+#define NVC36F_SET_OBJECT                                          (0x00000000)
+#define NVC36F_SET_OBJECT_NVCLASS                                         15:0
+#define NVC36F_SET_OBJECT_ENGINE                                         20:16
+#define NVC36F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVC36F_ILLEGAL                                             (0x00000004)
+#define NVC36F_ILLEGAL_HANDLE                                             31:0
+#define NVC36F_NOP                                                 (0x00000008)
+#define NVC36F_NOP_HANDLE                                                 31:0
+#define NVC36F_SEMAPHOREA                                          (0x00000010)
+#define NVC36F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVC36F_SEMAPHOREB                                          (0x00000014)
+#define NVC36F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVC36F_SEMAPHOREC                                          (0x00000018)
+#define NVC36F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVC36F_SEMAPHORED                                          (0x0000001C)
+#define NVC36F_SEMAPHORED_OPERATION                                        4:0
+#define NVC36F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVC36F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVC36F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVC36F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVC36F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
+#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+#define NVC36F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVC36F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVC36F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVC36F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVC36F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVC36F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+#define NVC36F_SEMAPHORED_REDUCTION                                      30:27
+#define NVC36F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVC36F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVC36F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVC36F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVC36F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVC36F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVC36F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVC36F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVC36F_SEMAPHORED_FORMAT                                         31:31
+#define NVC36F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVC36F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
+#define NVC36F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVC36F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVC36F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
+#define NVC36F_FB_FLUSH_HANDLE                                            31:0
+// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
+// specifying the page address for a targeted TLB invalidate and the uTLB for
+// a targeted REPLAY_CANCEL for UVM.
+// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
+// rearranged fields.
+#define NVC36F_MEM_OP_A                                            (0x00000028)
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0  // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                6:0  // only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
+#define NVC36F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
+#define NVC36F_MEM_OP_B                                            (0x0000002c)
+#define NVC36F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
+#define NVC36F_MEM_OP_C                                            (0x00000030)
+#define NVC36F_MEM_OP_C_MEMBAR_TYPE                                        2:0
+#define NVC36F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
+#define NVC36F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
+#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
+#define NVC36F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG            19:0
+// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+#define NVC36F_MEM_OP_D                                            (0x00000034)
+#define NVC36F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
+#define NVC36F_MEM_OP_D_OPERATION                                        31:27
+#define NVC36F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
+#define NVC36F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVC36F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+// CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC36F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+#define NVC36F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVC36F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVC36F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
+#define NVC36F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001
+#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3
+#define NVC36F_SET_REFERENCE                                       (0x00000050)
+#define NVC36F_SET_REFERENCE_COUNT                                        31:0
+#define NVC36F_SEM_ADDR_LO                                         (0x0000005c)
+#define NVC36F_SEM_ADDR_LO_OFFSET                                         31:2
+#define NVC36F_SEM_ADDR_HI                                         (0x00000060)
+#define NVC36F_SEM_ADDR_HI_OFFSET                                          7:0
+#define NVC36F_SEM_PAYLOAD_LO                                      (0x00000064)
+#define NVC36F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
+#define NVC36F_SEM_PAYLOAD_HI                                      (0x00000068)
+#define NVC36F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
+#define NVC36F_SEM_EXECUTE                                         (0x0000006c)
+#define NVC36F_SEM_EXECUTE_OPERATION                                       2:0
+#define NVC36F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
+#define NVC36F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
+#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
+#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
+#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
+#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
+#define NVC36F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
+#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
+#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
+#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
+#define NVC36F_SEM_EXECUTE_RELEASE_WFI                                   20:20
+#define NVC36F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
+#define NVC36F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
+#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
+#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
+#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
+#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
+#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
+#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
+#define NVC36F_SEM_EXECUTE_REDUCTION                                     30:27
+#define NVC36F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
+#define NVC36F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
+#define NVC36F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
+#define NVC36F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
+#define NVC36F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
+#define NVC36F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
+#define NVC36F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
+#define NVC36F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
+#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
+#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
+#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
+#define NVC36F_WFI                                                 (0x00000078)
+#define NVC36F_WFI_SCOPE                                                   0:0
+#define NVC36F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
+#define NVC36F_WFI_SCOPE_CURRENT_VEID                               0x00000000
+#define NVC36F_WFI_SCOPE_ALL                                        0x00000001
+#define NVC36F_CRC_CHECK                                           (0x0000007c)
+#define NVC36F_CRC_CHECK_VALUE                                            31:0
+#define NVC36F_YIELD                                               (0x00000080)
+#define NVC36F_YIELD_OP                                                    1:0
+#define NVC36F_YIELD_OP_NOP                                         0x00000000
+#define NVC36F_YIELD_OP_RUNLIST_TIMESLICE                           0x00000002
+#define NVC36F_YIELD_OP_TSG                                         0x00000003
+#define NVC36F_CLEAR_FAULTED                                       (0x00000084)
+#define NVC36F_CLEAR_FAULTED_CHID                                         11:0
+#define NVC36F_CLEAR_FAULTED_TYPE                                        31:31
+#define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
+#define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
+#define NVC36F_QUADRO_VERIFY                                       (0x000000a0)
+
+
+/* GPFIFO entry format */
+#define NVC36F_GP_ENTRY__SIZE                                   8
+#define NVC36F_GP_ENTRY0_FETCH                                0:0
+#define NVC36F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVC36F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
+#define NVC36F_GP_ENTRY0_GET                                 31:2
+#define NVC36F_GP_ENTRY0_OPERAND                             31:0
+#define NVC36F_GP_ENTRY1_GET_HI                               7:0
+#define NVC36F_GP_ENTRY1_PRIV                                 8:8
+#define NVC36F_GP_ENTRY1_PRIV_USER                     0x00000000
+#define NVC36F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
+#define NVC36F_GP_ENTRY1_LEVEL                                9:9
+#define NVC36F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVC36F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVC36F_GP_ENTRY1_LENGTH                             30:10
+#define NVC36F_GP_ENTRY1_SYNC                               31:31
+#define NVC36F_GP_ENTRY1_SYNC_PROCEED                  0x00000000
+#define NVC36F_GP_ENTRY1_SYNC_WAIT                     0x00000001
+#define NVC36F_GP_ENTRY1_OPCODE                               7:0
+#define NVC36F_GP_ENTRY1_OPCODE_NOP                    0x00000000
+#define NVC36F_GP_ENTRY1_OPCODE_ILLEGAL                0x00000001
+#define NVC36F_GP_ENTRY1_OPCODE_GP_CRC                 0x00000002
+#define NVC36F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003
+
+/* dma method formats */
+#define NVC36F_DMA_METHOD_ADDRESS_OLD                              12:2
+#define NVC36F_DMA_METHOD_ADDRESS                                  11:0
+#define NVC36F_DMA_SUBDEVICE_MASK                                  15:4
+#define NVC36F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVC36F_DMA_TERT_OP                                         17:16
+#define NVC36F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVC36F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVC36F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVC36F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVC36F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVC36F_DMA_METHOD_COUNT_OLD                                28:18
+#define NVC36F_DMA_METHOD_COUNT                                    28:16
+#define NVC36F_DMA_IMMD_DATA                                       28:16
+#define NVC36F_DMA_SEC_OP                                          31:29
+#define NVC36F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
+#define NVC36F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVC36F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
+#define NVC36F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+#define NVC36F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVC36F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVC36F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVC36F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
+/* dma incrementing method format */
+#define NVC36F_DMA_INCR_ADDRESS                                    11:0
+#define NVC36F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVC36F_DMA_INCR_COUNT                                      28:16
+#define NVC36F_DMA_INCR_OPCODE                                     31:29
+#define NVC36F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVC36F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVC36F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVC36F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVC36F_DMA_NONINCR_COUNT                                   28:16
+#define NVC36F_DMA_NONINCR_OPCODE                                  31:29
+#define NVC36F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVC36F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVC36F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVC36F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVC36F_DMA_ONEINCR_COUNT                                   28:16
+#define NVC36F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVC36F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVC36F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVC36F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVC36F_DMA_IMMD_ADDRESS                                    11:0
+#define NVC36F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVC36F_DMA_IMMD_DATA                                       28:16
+#define NVC36F_DMA_IMMD_OPCODE                                     31:29
+#define NVC36F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVC36F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVC36F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC36F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVC36F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVC36F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVC36F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVC36F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC36F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVC36F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVC36F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVC36F_DMA_ADDRESS                                         12:2
+#define NVC36F_DMA_SUBCH                                           15:13
+#define NVC36F_DMA_OPCODE3                                         17:16
+#define NVC36F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVC36F_DMA_COUNT                                           28:18
+#define NVC36F_DMA_OPCODE                                          31:29
+#define NVC36F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVC36F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVC36F_DMA_DATA                                            31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc36f_h_ */
--- a/kernel-open/nvidia-uvm/clc3b5.h
+++ b/kernel-open/nvidia-uvm/clc3b5.h
@@ -0,0 +1,203 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nvtypes.h"
+
+#ifndef _clc3b5_h_
+#define _clc3b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VOLTA_DMA_COPY_A                                                            (0x0000C3B5)
+
+#define NVC3B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC3B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC3B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC3B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC3B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC3B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC3B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC3B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC3B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC3B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC3B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC3B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC3B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC3B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC3B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC3B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC3B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC3B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC3B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC3B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC3B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC3B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC3B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC3B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC3B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
+#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2                                         21:21
+#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC3B5_LAUNCH_DMA_VPRMODE                                               23:22
+#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
+#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
+#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
+#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
+#define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
+#define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
+#define NVC3B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC3B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC3B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC3B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC3B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC3B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC3B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC3B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC3B5_PITCH_IN                                                         (0x00000410)
+#define NVC3B5_PITCH_IN_VALUE                                                   31:0
+#define NVC3B5_PITCH_OUT                                                        (0x00000414)
+#define NVC3B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC3B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC3B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC3B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC3B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC3B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC3B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC3B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC3B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC3B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc3b5_h
+
--- a/kernel-open/nvidia-uvm/clc46f.h
+++ b/kernel-open/nvidia-uvm/clc46f.h
@@ -0,0 +1,367 @@
+/*******************************************************************************
+    Copyright (c) 2012-2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef _clc46f_h_
+#define _clc46f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+/* class TURING_CHANNEL_GPFIFO  */
+/*
+ * Documentation for TURING_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ * Note there is no .mfs file for this class.
+ */
+#define  TURING_CHANNEL_GPFIFO_A                           (0x0000C46F)
+
+#define NVC46F_TYPEDEF                             TURING_CHANNELChannelGPFifoA
+
+/* dma flow control data structure */
+typedef volatile struct Nvc46fControl_struct {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} Nvc46fControl, TuringAControlGPFifo;
+
+/* fields and values */
+#define NVC46F_NUMBER_OF_SUBCHANNELS                               (8)
+#define NVC46F_SET_OBJECT                                          (0x00000000)
+#define NVC46F_SET_OBJECT_NVCLASS                                         15:0
+#define NVC46F_SET_OBJECT_ENGINE                                         20:16
+#define NVC46F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVC46F_ILLEGAL                                             (0x00000004)
+#define NVC46F_ILLEGAL_HANDLE                                             31:0
+#define NVC46F_NOP                                                 (0x00000008)
+#define NVC46F_NOP_HANDLE                                                 31:0
+#define NVC46F_SEMAPHOREA                                          (0x00000010)
+#define NVC46F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVC46F_SEMAPHOREB                                          (0x00000014)
+#define NVC46F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVC46F_SEMAPHOREC                                          (0x00000018)
+#define NVC46F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVC46F_SEMAPHORED                                          (0x0000001C)
+#define NVC46F_SEMAPHORED_OPERATION                                        4:0
+#define NVC46F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVC46F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVC46F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVC46F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVC46F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
+#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+#define NVC46F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVC46F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVC46F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVC46F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVC46F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVC46F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+#define NVC46F_SEMAPHORED_REDUCTION                                      30:27
+#define NVC46F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVC46F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVC46F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVC46F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVC46F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVC46F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVC46F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVC46F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVC46F_SEMAPHORED_FORMAT                                         31:31
+#define NVC46F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVC46F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
+#define NVC46F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVC46F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVC46F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
+#define NVC46F_FB_FLUSH_HANDLE                                            31:0
+// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
+// specifying the page address for a targeted TLB invalidate and the uTLB for
+// a targeted REPLAY_CANCEL for UVM.
+// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
+// rearranged fields.
+#define NVC46F_MEM_OP_A                                            (0x00000028)
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0  // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                6:0  // only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
+#define NVC46F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
+#define NVC46F_MEM_OP_B                                            (0x0000002c)
+#define NVC46F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
+#define NVC46F_MEM_OP_C                                            (0x00000030)
+#define NVC46F_MEM_OP_C_MEMBAR_TYPE                                        2:0
+#define NVC46F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
+#define NVC46F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
+#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
+#define NVC46F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG            19:0
+// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+#define NVC46F_MEM_OP_D                                            (0x00000034)
+#define NVC46F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
+#define NVC46F_MEM_OP_D_OPERATION                                        31:27
+#define NVC46F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVC46F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVC46F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
+#define NVC46F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVC46F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+// CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC46F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+#define NVC46F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVC46F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVC46F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
+#define NVC46F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001
+#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3
+#define NVC46F_SET_REFERENCE                                       (0x00000050)
+#define NVC46F_SET_REFERENCE_COUNT                                        31:0
+#define NVC46F_SEM_ADDR_LO                                         (0x0000005c)
+#define NVC46F_SEM_ADDR_LO_OFFSET                                         31:2
+#define NVC46F_SEM_ADDR_HI                                         (0x00000060)
+#define NVC46F_SEM_ADDR_HI_OFFSET                                          7:0
+#define NVC46F_SEM_PAYLOAD_LO                                      (0x00000064)
+#define NVC46F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
+#define NVC46F_SEM_PAYLOAD_HI                                      (0x00000068)
+#define NVC46F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
+#define NVC46F_SEM_EXECUTE                                         (0x0000006c)
+#define NVC46F_SEM_EXECUTE_OPERATION                                       2:0
+#define NVC46F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
+#define NVC46F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
+#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
+#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
+#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
+#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
+#define NVC46F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
+#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
+#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
+#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
+#define NVC46F_SEM_EXECUTE_RELEASE_WFI                                   20:20
+#define NVC46F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
+#define NVC46F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
+#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
+#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
+#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
+#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
+#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
+#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
+#define NVC46F_SEM_EXECUTE_REDUCTION                                     30:27
+#define NVC46F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
+#define NVC46F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
+#define NVC46F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
+#define NVC46F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
+#define NVC46F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
+#define NVC46F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
+#define NVC46F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
+#define NVC46F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
+#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
+#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
+#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
+#define NVC46F_WFI                                                 (0x00000078)
+#define NVC46F_WFI_SCOPE                                                   0:0
+#define NVC46F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
+#define NVC46F_WFI_SCOPE_CURRENT_VEID                               0x00000000
+#define NVC46F_WFI_SCOPE_ALL                                        0x00000001
+#define NVC46F_CRC_CHECK                                           (0x0000007c)
+#define NVC46F_CRC_CHECK_VALUE                                            31:0
+#define NVC46F_YIELD                                               (0x00000080)
+#define NVC46F_YIELD_OP                                                    1:0
+#define NVC46F_YIELD_OP_NOP                                         0x00000000
+#define NVC46F_YIELD_OP_RUNLIST_TIMESLICE                           0x00000002
+#define NVC46F_YIELD_OP_TSG                                         0x00000003
+#define NVC46F_CLEAR_FAULTED                                       (0x00000084)
+// Note: RM provides the HANDLE as an opaque value; the internal detail fields
+// are intentionally not exposed to the driver through these defines.
+#define NVC46F_CLEAR_FAULTED_HANDLE                                       30:0
+#define NVC46F_CLEAR_FAULTED_TYPE                                        31:31
+#define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
+#define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
+#define NVC46F_QUADRO_VERIFY                                       (0x000000a0)
+
+
+/* GPFIFO entry format */
+#define NVC46F_GP_ENTRY__SIZE                                   8
+#define NVC46F_GP_ENTRY0_FETCH                                0:0
+#define NVC46F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVC46F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
+#define NVC46F_GP_ENTRY0_GET                                 31:2
+#define NVC46F_GP_ENTRY0_OPERAND                             31:0
+#define NVC46F_GP_ENTRY1_GET_HI                               7:0
+#define NVC46F_GP_ENTRY1_LEVEL                                9:9
+#define NVC46F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVC46F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVC46F_GP_ENTRY1_LENGTH                             30:10
+#define NVC46F_GP_ENTRY1_SYNC                               31:31
+#define NVC46F_GP_ENTRY1_SYNC_PROCEED                  0x00000000
+#define NVC46F_GP_ENTRY1_SYNC_WAIT                     0x00000001
+#define NVC46F_GP_ENTRY1_OPCODE                               7:0
+#define NVC46F_GP_ENTRY1_OPCODE_NOP                    0x00000000
+#define NVC46F_GP_ENTRY1_OPCODE_ILLEGAL                0x00000001
+#define NVC46F_GP_ENTRY1_OPCODE_GP_CRC                 0x00000002
+#define NVC46F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003
+
+/* dma method formats */
+#define NVC46F_DMA_METHOD_ADDRESS_OLD                              12:2
+#define NVC46F_DMA_METHOD_ADDRESS                                  11:0
+#define NVC46F_DMA_SUBDEVICE_MASK                                  15:4
+#define NVC46F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVC46F_DMA_TERT_OP                                         17:16
+#define NVC46F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVC46F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVC46F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVC46F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVC46F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVC46F_DMA_METHOD_COUNT_OLD                                28:18
+#define NVC46F_DMA_METHOD_COUNT                                    28:16
+#define NVC46F_DMA_IMMD_DATA                                       28:16
+#define NVC46F_DMA_SEC_OP                                          31:29
+#define NVC46F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
+#define NVC46F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVC46F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
+#define NVC46F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+#define NVC46F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVC46F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVC46F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVC46F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
+/* dma incrementing method format */
+#define NVC46F_DMA_INCR_ADDRESS                                    11:0
+#define NVC46F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVC46F_DMA_INCR_COUNT                                      28:16
+#define NVC46F_DMA_INCR_OPCODE                                     31:29
+#define NVC46F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVC46F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVC46F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVC46F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVC46F_DMA_NONINCR_COUNT                                   28:16
+#define NVC46F_DMA_NONINCR_OPCODE                                  31:29
+#define NVC46F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVC46F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVC46F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVC46F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVC46F_DMA_ONEINCR_COUNT                                   28:16
+#define NVC46F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVC46F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVC46F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVC46F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVC46F_DMA_IMMD_ADDRESS                                    11:0
+#define NVC46F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVC46F_DMA_IMMD_DATA                                       28:16
+#define NVC46F_DMA_IMMD_OPCODE                                     31:29
+#define NVC46F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVC46F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVC46F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC46F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVC46F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVC46F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVC46F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVC46F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC46F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVC46F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVC46F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVC46F_DMA_ADDRESS                                         12:2
+#define NVC46F_DMA_SUBCH                                           15:13
+#define NVC46F_DMA_OPCODE3                                         17:16
+#define NVC46F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVC46F_DMA_COUNT                                           28:18
+#define NVC46F_DMA_OPCODE                                          31:29
+#define NVC46F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVC46F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVC46F_DMA_DATA                                            31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc46f_h_ */
--- a/kernel-open/nvidia-uvm/clc56f.h
+++ b/kernel-open/nvidia-uvm/clc56f.h
@@ -0,0 +1,369 @@
+/*******************************************************************************
+    Copyright (c) 2012-2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef _clc56f_h_
+#define _clc56f_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "nvtypes.h"
+
+/* class AMPERE_CHANNEL_GPFIFO  */
+/*
+ * Documentation for AMPERE_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ * Note there is no .mfs file for this class.
+ */
+#define  AMPERE_CHANNEL_GPFIFO_A                           (0x0000C56F)
+
+#define NVC56F_TYPEDEF                             AMPERE_CHANNELChannelGPFifoA
+
+/* dma flow control data structure */
+typedef volatile struct Nvc56fControl_struct {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} Nvc56fControl, AmpereAControlGPFifo;
+
+/* fields and values */
+#define NVC56F_NUMBER_OF_SUBCHANNELS                               (8)
+#define NVC56F_SET_OBJECT                                          (0x00000000)
+#define NVC56F_SET_OBJECT_NVCLASS                                         15:0
+#define NVC56F_SET_OBJECT_ENGINE                                         20:16
+#define NVC56F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVC56F_ILLEGAL                                             (0x00000004)
+#define NVC56F_ILLEGAL_HANDLE                                             31:0
+#define NVC56F_NOP                                                 (0x00000008)
+#define NVC56F_NOP_HANDLE                                                 31:0
+#define NVC56F_SEMAPHOREA                                          (0x00000010)
+#define NVC56F_SEMAPHOREA_OFFSET_UPPER                                     7:0
+#define NVC56F_SEMAPHOREB                                          (0x00000014)
+#define NVC56F_SEMAPHOREB_OFFSET_LOWER                                    31:2
+#define NVC56F_SEMAPHOREC                                          (0x00000018)
+#define NVC56F_SEMAPHOREC_PAYLOAD                                         31:0
+#define NVC56F_SEMAPHORED                                          (0x0000001C)
+#define NVC56F_SEMAPHORED_OPERATION                                        4:0
+#define NVC56F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
+#define NVC56F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
+#define NVC56F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVC56F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVC56F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
+#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
+#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
+#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
+#define NVC56F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVC56F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVC56F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVC56F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVC56F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVC56F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+#define NVC56F_SEMAPHORED_REDUCTION                                      30:27
+#define NVC56F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVC56F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVC56F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVC56F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVC56F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVC56F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVC56F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVC56F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVC56F_SEMAPHORED_FORMAT                                         31:31
+#define NVC56F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVC56F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
+#define NVC56F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVC56F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVC56F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
+#define NVC56F_FB_FLUSH_HANDLE                                            31:0
+// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
+// specifying the page address for a targeted TLB invalidate and the uTLB for
+// a targeted REPLAY_CANCEL for UVM.
+// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
+// rearranged fields.
+#define NVC56F_MEM_OP_A                                            (0x00000028)
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0  // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE                         7:6  // only relevant for invalidates with NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating  link TLB only, or non-link TLB only or all TLBs
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS                  0
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS                 1
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS             2
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD                    3
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                6:0  // only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
+#define NVC56F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
+#define NVC56F_MEM_OP_B                                            (0x0000002c)
+#define NVC56F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
+#define NVC56F_MEM_OP_C                                            (0x00000030)
+#define NVC56F_MEM_OP_C_MEMBAR_TYPE                                        2:0
+#define NVC56F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
+#define NVC56F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
+#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
+#define NVC56F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG            19:0
+// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+#define NVC56F_MEM_OP_D                                            (0x00000034)
+#define NVC56F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
+#define NVC56F_MEM_OP_D_OPERATION                                        31:27
+#define NVC56F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
+#define NVC56F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVC56F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+// CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC56F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+#define NVC56F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVC56F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVC56F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
+#define NVC56F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001
+#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3
+#define NVC56F_SET_REFERENCE                                       (0x00000050)
+#define NVC56F_SET_REFERENCE_COUNT                                        31:0
+#define NVC56F_SEM_ADDR_LO                                         (0x0000005c)
+#define NVC56F_SEM_ADDR_LO_OFFSET                                         31:2
+#define NVC56F_SEM_ADDR_HI                                         (0x00000060)
+#define NVC56F_SEM_ADDR_HI_OFFSET                                          7:0
+#define NVC56F_SEM_PAYLOAD_LO                                      (0x00000064)
+#define NVC56F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
+#define NVC56F_SEM_PAYLOAD_HI                                      (0x00000068)
+#define NVC56F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
+#define NVC56F_SEM_EXECUTE                                         (0x0000006c)
+#define NVC56F_SEM_EXECUTE_OPERATION                                       2:0
+#define NVC56F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
+#define NVC56F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
+#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
+#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
+#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
+#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
+#define NVC56F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
+#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
+#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
+#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
+#define NVC56F_SEM_EXECUTE_RELEASE_WFI                                   20:20
+#define NVC56F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
+#define NVC56F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
+#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
+#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
+#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
+#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
+#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
+#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
+#define NVC56F_SEM_EXECUTE_REDUCTION                                     30:27
+#define NVC56F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
+#define NVC56F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
+#define NVC56F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
+#define NVC56F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
+#define NVC56F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
+#define NVC56F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
+#define NVC56F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
+#define NVC56F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
+#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
+#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
+#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
+#define NVC56F_WFI                                                 (0x00000078)
+#define NVC56F_WFI_SCOPE                                                   0:0
+#define NVC56F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
+#define NVC56F_WFI_SCOPE_CURRENT_VEID                               0x00000000
+#define NVC56F_WFI_SCOPE_ALL                                        0x00000001
+#define NVC56F_YIELD                                               (0x00000080)
+#define NVC56F_YIELD_OP                                                    1:0
+#define NVC56F_YIELD_OP_NOP                                         0x00000000
+#define NVC56F_YIELD_OP_TSG                                         0x00000003
+#define NVC56F_CLEAR_FAULTED                                       (0x00000084)
+// Note: RM provides the HANDLE as an opaque value; the internal detail fields
+// are intentionally not exposed to the driver through these defines.
+#define NVC56F_CLEAR_FAULTED_HANDLE                                       30:0
+#define NVC56F_CLEAR_FAULTED_TYPE                                        31:31
+#define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
+#define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
+#define NVC56F_QUADRO_VERIFY                                       (0x000000a0)
+
+
+/* GPFIFO entry format */
+#define NVC56F_GP_ENTRY__SIZE                                   8
+#define NVC56F_GP_ENTRY0_FETCH                                0:0
+#define NVC56F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVC56F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
+#define NVC56F_GP_ENTRY0_GET                                 31:2
+#define NVC56F_GP_ENTRY0_OPERAND                             31:0
+#define NVC56F_GP_ENTRY1_GET_HI                               7:0
+#define NVC56F_GP_ENTRY1_LEVEL                                9:9
+#define NVC56F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVC56F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVC56F_GP_ENTRY1_LENGTH                             30:10
+#define NVC56F_GP_ENTRY1_SYNC                               31:31
+#define NVC56F_GP_ENTRY1_SYNC_PROCEED                  0x00000000
+#define NVC56F_GP_ENTRY1_SYNC_WAIT                     0x00000001
+#define NVC56F_GP_ENTRY1_OPCODE                               7:0
+#define NVC56F_GP_ENTRY1_OPCODE_NOP                    0x00000000
+#define NVC56F_GP_ENTRY1_OPCODE_ILLEGAL                0x00000001
+#define NVC56F_GP_ENTRY1_OPCODE_GP_CRC                 0x00000002
+#define NVC56F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003
+
+/* dma method formats */
+#define NVC56F_DMA_METHOD_ADDRESS_OLD                              12:2
+#define NVC56F_DMA_METHOD_ADDRESS                                  11:0
+#define NVC56F_DMA_SUBDEVICE_MASK                                  15:4
+#define NVC56F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVC56F_DMA_TERT_OP                                         17:16
+#define NVC56F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVC56F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVC56F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVC56F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVC56F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVC56F_DMA_METHOD_COUNT_OLD                                28:18
+#define NVC56F_DMA_METHOD_COUNT                                    28:16
+#define NVC56F_DMA_IMMD_DATA                                       28:16
+#define NVC56F_DMA_SEC_OP                                          31:29
+#define NVC56F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
+#define NVC56F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVC56F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
+#define NVC56F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+#define NVC56F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVC56F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVC56F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVC56F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
+/* dma incrementing method format */
+#define NVC56F_DMA_INCR_ADDRESS                                    11:0
+#define NVC56F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVC56F_DMA_INCR_COUNT                                      28:16
+#define NVC56F_DMA_INCR_OPCODE                                     31:29
+#define NVC56F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVC56F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVC56F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVC56F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVC56F_DMA_NONINCR_COUNT                                   28:16
+#define NVC56F_DMA_NONINCR_OPCODE                                  31:29
+#define NVC56F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVC56F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVC56F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVC56F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVC56F_DMA_ONEINCR_COUNT                                   28:16
+#define NVC56F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVC56F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVC56F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVC56F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVC56F_DMA_IMMD_ADDRESS                                    11:0
+#define NVC56F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVC56F_DMA_IMMD_DATA                                       28:16
+#define NVC56F_DMA_IMMD_OPCODE                                     31:29
+#define NVC56F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVC56F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVC56F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC56F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVC56F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVC56F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVC56F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVC56F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC56F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVC56F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVC56F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVC56F_DMA_ADDRESS                                         12:2
+#define NVC56F_DMA_SUBCH                                           15:13
+#define NVC56F_DMA_OPCODE3                                         17:16
+#define NVC56F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVC56F_DMA_COUNT                                           28:18
+#define NVC56F_DMA_OPCODE                                          31:29
+#define NVC56F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVC56F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVC56F_DMA_DATA                                            31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif /* _clc56f_h_ */
--- a/kernel-open/nvidia-uvm/clc5b5.h
+++ b/kernel-open/nvidia-uvm/clc5b5.h
@@ -0,0 +1,352 @@
+/*******************************************************************************
+    Copyright (c) 1993-2004 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+
+#include "nvtypes.h"
+
+#ifndef _clc5b5_h_
+#define _clc5b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TURING_DMA_COPY_A                                                            (0x0000C5B5)
+
+typedef volatile struct _clc5b5_tag0 {
+    NvV32 Reserved00[0x40];
+    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
+    NvV32 Reserved01[0xF];
+    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
+    NvV32 Reserved02[0x3F];
+    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
+    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
+    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
+    NvV32 Reserved03[0x2];
+    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
+    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
+    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
+    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
+    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
+    NvV32 Reserved04[0x6];
+    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
+    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
+    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
+    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
+    NvV32 Reserved05[0x1C];
+    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
+    NvV32 Reserved06[0x3F];
+    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
+    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
+    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
+    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
+    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
+    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
+    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
+    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
+    NvV32 Reserved07[0xB8];
+    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
+    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
+    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
+    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
+    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
+    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
+    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
+    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
+    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
+    NvV32 Reserved08[0x1];
+    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
+    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
+    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
+    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
+    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
+    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
+    NvV32 Reserved09[0x1];
+    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
+    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
+    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
+    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
+    NvV32 Reserved10[0x270];
+    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
+    NvV32 Reserved11[0x3BA];
+} turing_dma_copy_aControlPio;
+
+#define NVC5B5_NOP                                                              (0x00000100)
+#define NVC5B5_NOP_PARAMETER                                                    31:0
+#define NVC5B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC5B5_PM_TRIGGER_V                                                     31:0
+#define NVC5B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC5B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC5B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC5B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC5B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC5B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC5B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC5B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC5B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC5B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC5B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC5B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC5B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC5B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC5B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC5B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC5B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC5B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC5B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
+#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
+#define NVC5B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
+#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
+#define NVC5B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
+#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
+#define NVC5B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
+#define NVC5B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
+#define NVC5B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
+#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
+#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
+#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC5B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC5B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC5B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC5B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
+#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2                                         21:21
+#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC5B5_LAUNCH_DMA_VPRMODE                                               23:22
+#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
+#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
+#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
+#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
+#define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
+#define NVC5B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
+#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
+#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
+#define NVC5B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
+#define NVC5B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC5B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC5B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC5B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC5B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC5B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC5B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC5B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC5B5_PITCH_IN                                                         (0x00000410)
+#define NVC5B5_PITCH_IN_VALUE                                                   31:0
+#define NVC5B5_PITCH_OUT                                                        (0x00000414)
+#define NVC5B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC5B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC5B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC5B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC5B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC5B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC5B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC5B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC5B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC5B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC5B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC5B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC5B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC5B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC5B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC5B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC5B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC5B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC5B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC5B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC5B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC5B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC5B5_SET_DST_LAYER_V                                                  31:0
+#define NVC5B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC5B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC5B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC5B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC5B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC5B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC5B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC5B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC5B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC5B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC5B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC5B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC5B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC5B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC5B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC5B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC5B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC5B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC5B5_SRC_ORIGIN_X                                                     (0x00000744)
+#define NVC5B5_SRC_ORIGIN_X_VALUE                                               31:0
+#define NVC5B5_SRC_ORIGIN_Y                                                     (0x00000748)
+#define NVC5B5_SRC_ORIGIN_Y_VALUE                                               31:0
+#define NVC5B5_DST_ORIGIN_X                                                     (0x0000074C)
+#define NVC5B5_DST_ORIGIN_X_VALUE                                               31:0
+#define NVC5B5_DST_ORIGIN_Y                                                     (0x00000750)
+#define NVC5B5_DST_ORIGIN_Y_VALUE                                               31:0
+#define NVC5B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC5B5_PM_TRIGGER_END_V                                                 31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc5b5_h
+
--- a/kernel-open/nvidia-uvm/clc6b5.h
+++ b/kernel-open/nvidia-uvm/clc6b5.h
@@ -0,0 +1,352 @@
+/*******************************************************************************
+    Copyright (c) 1993-2004 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+
+#include "nvtypes.h"
+
+#ifndef _clc6b5_h_
+#define _clc6b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define AMPERE_DMA_COPY_A                                                            (0x0000C6B5)
+
+typedef volatile struct _clc6b5_tag0 {
+    NvV32 Reserved00[0x40];
+    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
+    NvV32 Reserved01[0xF];
+    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
+    NvV32 Reserved02[0x3F];
+    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
+    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
+    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
+    NvV32 Reserved03[0x2];
+    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
+    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
+    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
+    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
+    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
+    NvV32 Reserved04[0x6];
+    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
+    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
+    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
+    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
+    NvV32 Reserved05[0x1C];
+    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
+    NvV32 Reserved06[0x3F];
+    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
+    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
+    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
+    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
+    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
+    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
+    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
+    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
+    NvV32 Reserved07[0xB8];
+    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
+    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
+    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
+    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
+    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
+    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
+    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
+    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
+    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
+    NvV32 Reserved08[0x1];
+    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
+    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
+    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
+    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
+    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
+    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
+    NvV32 Reserved09[0x1];
+    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
+    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
+    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
+    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
+    NvV32 Reserved10[0x270];
+    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
+    NvV32 Reserved11[0x3BA];
+} ampere_dma_copy_aControlPio;
+
+#define NVC6B5_NOP                                                              (0x00000100)
+#define NVC6B5_NOP_PARAMETER                                                    31:0
+#define NVC6B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC6B5_PM_TRIGGER_V                                                     31:0
+#define NVC6B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC6B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC6B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC6B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC6B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC6B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC6B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC6B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC6B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC6B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC6B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC6B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC6B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC6B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
+#define NVC6B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC6B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
+#define NVC6B5_SET_SRC_PHYS_MODE_FLA                                            9:9
+#define NVC6B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC6B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC6B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC6B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC6B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC6B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
+#define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC6B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
+#define NVC6B5_SET_DST_PHYS_MODE_FLA                                            9:9
+#define NVC6B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
+#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
+#define NVC6B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
+#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
+#define NVC6B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
+#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
+#define NVC6B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
+#define NVC6B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
+#define NVC6B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
+#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
+#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
+#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC6B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC6B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC6B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC6B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC6B5_LAUNCH_DMA_VPRMODE                                               23:22
+#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
+#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
+#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
+#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
+#define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
+#define NVC6B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
+#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
+#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
+#define NVC6B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
+#define NVC6B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC6B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC6B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC6B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC6B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC6B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC6B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC6B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC6B5_PITCH_IN                                                         (0x00000410)
+#define NVC6B5_PITCH_IN_VALUE                                                   31:0
+#define NVC6B5_PITCH_OUT                                                        (0x00000414)
+#define NVC6B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC6B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC6B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC6B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC6B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC6B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC6B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC6B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC6B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC6B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC6B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC6B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC6B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC6B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC6B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC6B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC6B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC6B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC6B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC6B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC6B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC6B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC6B5_SET_DST_LAYER_V                                                  31:0
+#define NVC6B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC6B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC6B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC6B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC6B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC6B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC6B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC6B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC6B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC6B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC6B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC6B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC6B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC6B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC6B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC6B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC6B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC6B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC6B5_SRC_ORIGIN_X                                                     (0x00000744)
+#define NVC6B5_SRC_ORIGIN_X_VALUE                                               31:0
+#define NVC6B5_SRC_ORIGIN_Y                                                     (0x00000748)
+#define NVC6B5_SRC_ORIGIN_Y_VALUE                                               31:0
+#define NVC6B5_DST_ORIGIN_X                                                     (0x0000074C)
+#define NVC6B5_DST_ORIGIN_X_VALUE                                               31:0
+#define NVC6B5_DST_ORIGIN_Y                                                     (0x00000750)
+#define NVC6B5_DST_ORIGIN_Y_VALUE                                               31:0
+#define NVC6B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC6B5_PM_TRIGGER_END_V                                                 31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc6b5_h
+
--- a/kernel-open/nvidia-uvm/clc7b5.h
+++ b/kernel-open/nvidia-uvm/clc7b5.h
@@ -0,0 +1,379 @@
+/*******************************************************************************
+    Copyright (c) 1993-2004 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+
+#include "nvtypes.h"
+
+#ifndef _clc7b5_h_
+#define _clc7b5_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define AMPERE_DMA_COPY_B                                                            (0x0000C7B5)
+
+typedef volatile struct _clc7b5_tag0 {
+    NvV32 Reserved00[0x40];
+    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
+    NvV32 Reserved01[0xF];
+    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
+    NvV32 Reserved02[0x36];
+    NvV32 SetMonitoredFenceType;                                                // 0x0000021C - 0x0000021F
+    NvV32 SetMonitoredFenceSignalAddrBaseUpper;                                 // 0x00000220 - 0x00000223
+    NvV32 SetMonitoredFenceSignalAddrBaseLower;                                 // 0x00000224 - 0x00000227
+    NvV32 Reserved03[0x6];
+    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
+    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
+    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
+    NvV32 SetSemaphorePayloadUpper;                                             // 0x0000024C - 0x0000024F
+    NvV32 Reserved04[0x1];
+    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
+    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
+    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
+    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
+    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
+    NvV32 Reserved05[0x6];
+    NvV32 SetGlobalCounterUpper;                                                // 0x00000280 - 0x00000283
+    NvV32 SetGlobalCounterLower;                                                // 0x00000284 - 0x00000287
+    NvV32 SetPageoutStartPAUpper;                                               // 0x00000288 - 0x0000028B
+    NvV32 SetPageoutStartPALower;                                               // 0x0000028C - 0x0000028F
+    NvV32 Reserved06[0x1C];
+    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
+    NvV32 Reserved07[0x3F];
+    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
+    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
+    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
+    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
+    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
+    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
+    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
+    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
+    NvV32 Reserved08[0xB8];
+    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
+    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
+    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
+    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
+    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
+    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
+    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
+    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
+    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
+    NvV32 Reserved09[0x1];
+    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
+    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
+    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
+    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
+    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
+    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
+    NvV32 Reserved10[0x1];
+    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
+    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
+    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
+    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
+    NvV32 Reserved11[0x270];
+    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
+    NvV32 Reserved12[0x3BA];
+} ampere_dma_copy_bControlPio;
+
+#define NVC7B5_NOP                                                              (0x00000100)
+#define NVC7B5_NOP_PARAMETER                                                    31:0
+#define NVC7B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC7B5_PM_TRIGGER_V                                                     31:0
+#define NVC7B5_SET_MONITORED_FENCE_TYPE                                         (0x0000021C)
+#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE                                    0:0
+#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE                    (0x00000000)
+#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT                (0x00000001)
+#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER                       (0x00000220)
+#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER                 16:0
+#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER                       (0x00000224)
+#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER                 31:0
+#define NVC7B5_SET_SEMAPHORE_A                                                  (0x00000240)
+#define NVC7B5_SET_SEMAPHORE_A_UPPER                                            16:0
+#define NVC7B5_SET_SEMAPHORE_B                                                  (0x00000244)
+#define NVC7B5_SET_SEMAPHORE_B_LOWER                                            31:0
+#define NVC7B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC7B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
+#define NVC7B5_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x0000024C)
+#define NVC7B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD                              31:0
+#define NVC7B5_SET_RENDER_ENABLE_A                                              (0x00000254)
+#define NVC7B5_SET_RENDER_ENABLE_A_UPPER                                        7:0
+#define NVC7B5_SET_RENDER_ENABLE_B                                              (0x00000258)
+#define NVC7B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
+#define NVC7B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE                                         2:0
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
+#define NVC7B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
+#define NVC7B5_SET_SRC_PHYS_MODE                                                (0x00000260)
+#define NVC7B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
+#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
+#define NVC7B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC7B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
+#define NVC7B5_SET_SRC_PHYS_MODE_FLA                                            9:9
+#define NVC7B5_SET_DST_PHYS_MODE                                                (0x00000264)
+#define NVC7B5_SET_DST_PHYS_MODE_TARGET                                         1:0
+#define NVC7B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
+#define NVC7B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC7B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
+#define NVC7B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
+#define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
+#define NVC7B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
+#define NVC7B5_SET_DST_PHYS_MODE_FLA                                            9:9
+#define NVC7B5_SET_GLOBAL_COUNTER_UPPER                                         (0x00000280)
+#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V                                       31:0
+#define NVC7B5_SET_GLOBAL_COUNTER_LOWER                                         (0x00000284)
+#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V                                       31:0
+#define NVC7B5_SET_PAGEOUT_START_PAUPPER                                        (0x00000288)
+#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V                                      4:0
+#define NVC7B5_SET_PAGEOUT_START_PALOWER                                        (0x0000028C)
+#define NVC7B5_SET_PAGEOUT_START_PALOWER_V                                      31:0
+#define NVC7B5_LAUNCH_DMA                                                       (0x00000300)
+#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
+#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
+#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
+#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
+#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
+#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
+#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
+#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
+#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP         (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP       (0x00000002)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
+#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
+#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
+#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
+#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
+#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
+#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
+#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
+#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
+#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
+#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
+#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
+#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
+#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
+#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
+#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
+#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
+#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC7B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC7B5_LAUNCH_DMA_DST_TYPE                                              13:13
+#define NVC7B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
+#define NVC7B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA                          (0x00000008)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB                          (0x00000009)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN                              (0x0000000B)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX                              (0x0000000C)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC                          (0x0000000D)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD                          (0x0000000E)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE                          (0x0000000F)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
+#define NVC7B5_LAUNCH_DMA_VPRMODE                                               23:22
+#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
+#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
+#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
+#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
+#define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
+#define NVC7B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
+#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
+#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE                                27:27
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD                       (0x00000000)
+#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD                       (0x00000001)
+#define NVC7B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
+#define NVC7B5_OFFSET_IN_UPPER                                                  (0x00000400)
+#define NVC7B5_OFFSET_IN_UPPER_UPPER                                            16:0
+#define NVC7B5_OFFSET_IN_LOWER                                                  (0x00000404)
+#define NVC7B5_OFFSET_IN_LOWER_VALUE                                            31:0
+#define NVC7B5_OFFSET_OUT_UPPER                                                 (0x00000408)
+#define NVC7B5_OFFSET_OUT_UPPER_UPPER                                           16:0
+#define NVC7B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
+#define NVC7B5_OFFSET_OUT_LOWER_VALUE                                           31:0
+#define NVC7B5_PITCH_IN                                                         (0x00000410)
+#define NVC7B5_PITCH_IN_VALUE                                                   31:0
+#define NVC7B5_PITCH_OUT                                                        (0x00000414)
+#define NVC7B5_PITCH_OUT_VALUE                                                  31:0
+#define NVC7B5_LINE_LENGTH_IN                                                   (0x00000418)
+#define NVC7B5_LINE_LENGTH_IN_VALUE                                             31:0
+#define NVC7B5_LINE_COUNT                                                       (0x0000041C)
+#define NVC7B5_LINE_COUNT_VALUE                                                 31:0
+#define NVC7B5_SET_REMAP_CONST_A                                                (0x00000700)
+#define NVC7B5_SET_REMAP_CONST_A_V                                              31:0
+#define NVC7B5_SET_REMAP_CONST_B                                                (0x00000704)
+#define NVC7B5_SET_REMAP_CONST_B_V                                              31:0
+#define NVC7B5_SET_REMAP_COMPONENTS                                             (0x00000708)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
+#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
+#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
+#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
+#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC7B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC7B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC7B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC7B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC7B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC7B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC7B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC7B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC7B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC7B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC7B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC7B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC7B5_SET_DST_LAYER_V                                                  31:0
+#define NVC7B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC7B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC7B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC7B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC7B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC7B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC7B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC7B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC7B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC7B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC7B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC7B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC7B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC7B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC7B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC7B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC7B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC7B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC7B5_SRC_ORIGIN_X                                                     (0x00000744)
+#define NVC7B5_SRC_ORIGIN_X_VALUE                                               31:0
+#define NVC7B5_SRC_ORIGIN_Y                                                     (0x00000748)
+#define NVC7B5_SRC_ORIGIN_Y_VALUE                                               31:0
+#define NVC7B5_DST_ORIGIN_X                                                     (0x0000074C)
+#define NVC7B5_DST_ORIGIN_X_VALUE                                               31:0
+#define NVC7B5_DST_ORIGIN_Y                                                     (0x00000750)
+#define NVC7B5_DST_ORIGIN_Y_VALUE                                               31:0
+#define NVC7B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC7B5_PM_TRIGGER_END_V                                                 31:0
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clc7b5_h
+
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -0,0 +1,51 @@
+/*******************************************************************************
+    Copyright (c) 2013-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _ctrl2080mc_h_
+#define _ctrl2080mc_h_
+
+/* valid architecture values */
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_T13X                 (0xE0000013)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000                (0x00000110)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200                (0x00000120)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100                (0x00000130)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100                (0x00000140)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100                (0x00000160)
+#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100                (0x00000170)
+
+
+
+
+
+
+
+
+
+
+/* valid ARCHITECTURE_GP10x implementation values */
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100              (0x00000000)
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP000              (0x00000001)
+
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100              (0x00000000)
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000              (0x00000001)
+#endif /* _ctrl2080mc_h_ */
--- a/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_fault.h
+++ b/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_fault.h
@@ -0,0 +1,480 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __ga100_dev_fault_h__
+#define __ga100_dev_fault_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_PFAULT                                              /* ----G */
+#define NV_PFAULT_MMU_ENG_ID_GRAPHICS          64 /*       */
+#define NV_PFAULT_MMU_ENG_ID_DISPLAY           1 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GSP               2 /*       */
+#define NV_PFAULT_MMU_ENG_ID_IFB               9 /*       */
+#define NV_PFAULT_MMU_ENG_ID_FLA               4 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1              128 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR2              192 /*       */
+#define NV_PFAULT_MMU_ENG_ID_SEC               14 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PERF              8 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC             25 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC0            25 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC1            26 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC2            27 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC3            28 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC4            29 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVJPG0            30 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GRCOPY            15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE0               15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE1               16 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE2               17 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE3               18 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE4               19 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE5               20 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE6               21 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE7               22 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE8               23 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE9               24 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PWR_PMU           6 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PTP               3 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC0            11 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC1            12 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC2            13 /*       */
+#define NV_PFAULT_MMU_ENG_ID_OFA0              10 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PHYSICAL          31 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST0             32 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST1             33 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST2             34 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST3             35 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST4             36 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST5             37 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST6             38 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST7             39 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST8             40 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST9             41 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST10            42 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST11            43 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST12            44 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST13            45 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST14            46 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST15            47 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST16            48 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST17            49 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST18            50 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST19            51 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST20            52 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST21            53 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST22            54 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST23            55 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST24            56 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST25            57 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST26            58 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST27            59 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST28            60 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST29            61 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST30            62 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST31            63 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0          128  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1          129  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2          130  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3          131  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4          132  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5          133  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6          134  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7          135  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8          136  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9          137  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10         138 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11         139 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12         140 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13         141 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14         142 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15         143 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16         144 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17         145 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18         146 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19         147 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20         148 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21         149 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22         150 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23         151 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24         152 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25         153 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26         154 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27         155 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28         156 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29         157 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30         158 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31         159 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32         160 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33         161 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34         162 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35         163 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36         164 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37         165 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38         166 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39         167 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40         168 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41         169 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42         170 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43         171 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44         172 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45         173 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46         174 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47         175 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48         176 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49         177 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50         178 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51         179 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52         180 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53         181 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54         182 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55         183 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56         184 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57         185 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58         186 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59         187 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60         188 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61         189 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62         190 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63         191 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0          192  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1          193  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2          194  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3          195  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4          196  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5          197  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6          198  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7          199  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8          200  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9          201  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10         202 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11         203 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12         204 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13         205 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14         206 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15         207 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16         208 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17         209 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18         210 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19         211 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20         212 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21         213 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22         214 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23         215 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24         216 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25         217 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26         218 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27         219 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28         220 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29         221 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30         222 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31         223 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32         224 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33         225 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34         226 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35         227 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36         228 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37         229 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38         230 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39         231 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40         232 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41         233 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42         234 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43         235 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44         236 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45         237 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46         238 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47         239 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48         240 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49         241 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50         242 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51         243 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52         244 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53         245 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54         246 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55         247 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56         248 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57         249 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58         250 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59         251 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60         252 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61         253 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62         254 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63         255 /*     */
+#define NV_PFAULT_FAULT_TYPE                             4:0 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE                  0x00000000 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE_SIZE             0x00000001 /*       */
+#define NV_PFAULT_FAULT_TYPE_PTE                  0x00000002 /*       */
+#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION   0x00000003 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK   0x00000004 /*       */
+#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION       0x00000005 /*       */
+#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION         0x00000006 /*       */
+#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION         0x00000007 /*       */
+#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /*       */
+#define NV_PFAULT_FAULT_TYPE_WORK_CREATION        0x00000009 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /*       */
+#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE  0x0000000b /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND     0x0000000c /*       */
+#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION     0x0000000d /*       */
+#define NV_PFAULT_FAULT_TYPE_POISONED             0x0000000e /*       */
+#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION     0x0000000f /*       */
+#define NV_PFAULT_CLIENT                       14:8 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_0        0x00000000 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_1        0x00000001 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_2        0x00000002 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_3        0x00000003 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_4        0x00000004 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_5        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_6        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_7        0x00000007 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_0        0x00000008 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_1        0x00000009 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_2        0x0000000A /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_3        0x0000000B /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_4        0x0000000C /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_5        0x0000000D /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_6        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_7        0x0000000F /*       */
+#define NV_PFAULT_CLIENT_GPC_RAST        0x00000010 /*       */
+#define NV_PFAULT_CLIENT_GPC_GCC         0x00000011 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPCCS       0x00000012 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_0      0x00000013 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_1      0x00000014 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_2      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_3      0x00000016 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_8        0x00000021 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_9        0x00000022 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_10       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_11       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_12       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_13       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_14       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_15       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_0     0x00000029 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_1     0x0000002A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_2     0x0000002B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_3     0x0000002C /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_4     0x0000002D /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_5     0x0000002E /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_6     0x0000002F /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_7     0x00000030 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_8        0x00000031 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_9        0x00000032 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_8     0x00000033 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_9     0x00000034 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_16       0x00000035 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_17       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_18       0x00000037 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_19       0x00000038 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_10       0x00000039 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_11       0x0000003A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_10    0x0000003B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_11    0x0000003C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_20       0x0000003D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_21       0x0000003E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_22       0x0000003F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_23       0x00000040 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_12       0x00000041 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_13       0x00000042 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_12    0x00000043 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_13    0x00000044 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_24       0x00000045 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_25       0x00000046 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_26       0x00000047 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_27       0x00000048 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_14       0x00000049 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_15       0x0000004A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_14    0x0000004B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_15    0x0000004C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_28       0x0000004D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_29       0x0000004E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_30       0x0000004F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_31       0x00000050 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_16       0x00000051 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_17       0x00000052 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_16    0x00000053 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_17    0x00000054 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_32       0x00000055 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_33       0x00000056 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_34       0x00000057 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_35       0x00000058 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_18       0x00000059 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_19       0x0000005A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_18    0x0000005B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_19    0x0000005C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_36       0x0000005D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_37       0x0000005E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_38       0x0000005F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_39       0x00000060 /*       */
+#define NV_PFAULT_CLIENT_GPC_ROP_0       0x00000070 /*       */
+#define NV_PFAULT_CLIENT_GPC_ROP_1       0x00000071 /*       */
+#define NV_PFAULT_CLIENT_GPC_ROP_2       0x00000072 /*       */
+#define NV_PFAULT_CLIENT_GPC_ROP_3       0x00000073 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPM          0x00000017 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0   0x00000018 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1   0x00000019 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2   0x0000001A /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3   0x0000001B /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4   0x0000001C /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5   0x0000001D /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6   0x0000001E /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7   0x0000001F /*       */
+#define NV_PFAULT_CLIENT_GPC_RGG_UTLB     0x00000020 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_8   0x00000031 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_9   0x00000032 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_10  0x00000033 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_11  0x00000034 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_12  0x00000035 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_13  0x00000036 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_14  0x00000037 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_15  0x00000038 /*       */
+#define NV_PFAULT_CLIENT_HUB_VIP         0x00000000 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE0         0x00000001 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE1         0x00000002 /*       */
+#define NV_PFAULT_CLIENT_HUB_DNISO       0x00000003 /*       */
+#define NV_PFAULT_CLIENT_HUB_DISPNISO    0x00000003 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE0         0x00000004 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE          0x00000004 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS0       0x00000005 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU    0x00000007 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /*       */
+#define NV_PFAULT_CLIENT_HUB_ISO         0x00000009 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU         0x0000000A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC0      0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC       0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC1      0x0000000D /*       */
+#define NV_PFAULT_CLIENT_HUB_NISO        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_HUB_ACTRS       0x0000000E /*       */
+#define NV_PFAULT_CLIENT_HUB_P2P         0x0000000F /*       */
+#define NV_PFAULT_CLIENT_HUB_PD          0x00000010 /*       */
+#define NV_PFAULT_CLIENT_HUB_PERF0       0x00000011 /*       */
+#define NV_PFAULT_CLIENT_HUB_PERF        0x00000011 /*       */
+#define NV_PFAULT_CLIENT_HUB_PMU         0x00000012 /*       */
+#define NV_PFAULT_CLIENT_HUB_RASTERTWOD  0x00000013 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC         0x00000014 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC_NB      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_HUB_SEC         0x00000016 /*       */
+#define NV_PFAULT_CLIENT_HUB_SSYNC       0x00000017 /*       */
+#define NV_PFAULT_CLIENT_HUB_GRCOPY      0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE2         0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_XV          0x00000019 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU_NB      0x0000001A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC0      0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC       0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_DFALCON     0x0000001C /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED0       0x0000001D /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED        0x0000001D /*       */
+#define NV_PFAULT_CLIENT_HUB_AFALCON     0x0000001E /*       */
+#define NV_PFAULT_CLIENT_HUB_DONT_CARE   0x0000001F /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE0       0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE1       0x00000021 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE2       0x00000022 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE3       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE4       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE5       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE6       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE7       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE8       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE9       0x00000029 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSHUB       0x0000002A /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X0      0x0000002B /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X1      0x0000002C /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X2      0x0000002D /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X3      0x0000002E /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X4      0x0000002F /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X5      0x00000030 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X6      0x00000031 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X7      0x00000032 /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC2      0x00000033 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /*       */
+#define NV_PFAULT_CLIENT_HUB_DWBIF       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_HUB_FBFALCON    0x00000037 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE_SHIM     0x00000038 /*       */
+#define NV_PFAULT_CLIENT_HUB_GSP         0x00000039 /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC1      0x0000003A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC2      0x0000003B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVJPG0      0x0000003C /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC3      0x0000003D /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC4      0x0000003E /*       */
+#define NV_PFAULT_CLIENT_HUB_OFA0        0x0000003F /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE10      0x00000040 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE11      0x00000041 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE12      0x00000042 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE13      0x00000043 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE14      0x00000044 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE15      0x00000045 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X8      0x00000046 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X9      0x00000047 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X10     0x00000048 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X11     0x00000049 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X12     0x0000004A /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X13     0x0000004B /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X14     0x0000004C /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X15     0x0000004D /*       */
+#define NV_PFAULT_CLIENT_HUB_FE1         0x0000004E /*       */
+#define NV_PFAULT_CLIENT_HUB_FE2         0x0000004F /*       */
+#define NV_PFAULT_CLIENT_HUB_FE3         0x00000050 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE4         0x00000051 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE5         0x00000052 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE6         0x00000053 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE7         0x00000054 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS1       0x00000055 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS2       0x00000056 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS3       0x00000057 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS4       0x00000058 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS5       0x00000059 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS6       0x0000005A /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS7       0x0000005B /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED1       0x0000005C /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED2       0x0000005D /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED3       0x0000005E /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED4       0x0000005F /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED5       0x00000060 /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED6       0x00000061 /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED7       0x00000062 /*       */
+#define NV_PFAULT_CLIENT_HUB_ESC         0x00000063 /*       */
+#define NV_PFAULT_ACCESS_TYPE                 19:16 /*       */
+#define NV_PFAULT_ACCESS_TYPE_READ       0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_WRITE      0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_ATOMIC     0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PREFETCH   0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_READ          0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE         0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC        0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH      0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK   0x00000004 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_READ          0x00000008 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE         0x00000009 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC        0x0000000a /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH      0x0000000b /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE             20:20 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_GPC    0x00000000 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_HUB    0x00000001 /*       */
+#define NV_PFAULT_GPC_ID                      28:24 /*       */
+#define NV_PFAULT_PROTECTED_MODE              29:29 /*       */
+#define NV_PFAULT_REPLAYABLE_FAULT_EN         30:30 /*       */
+#define NV_PFAULT_VALID                       31:31 /*       */
+#endif // __ga100_dev_fault_h__
--- a/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_runlist.h
+++ b/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_runlist.h
@@ -0,0 +1,782 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __ga100_dev_runlist_h__
+#define __ga100_dev_runlist_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_RUNLIST  0x000003ff:0x00000000 /* RW--D */
+#define NV_CHRAM      0x00001fff:0x00000000 /* RW--D */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK                     0x040 /* RW-4R */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION                                                             3:0 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL   15 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED                                   0x0000000F /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED                                  0x00000008 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0                                                      0:0 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1                                                      1:1 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2                                                      2:2 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3                                                      3:3 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION                                                            7:4 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL    8 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED                                        0x0F /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED                                       0x08 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0                                                     4:4 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1                                                     5:5 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2                                                     6:6 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3                                                     7:7 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION                                                              8:8 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR                                          0x00000001 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON                                            0x00000000 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION                                                             9:9 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR                                         0x00000001 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON                                           0x00000000 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL                                                       10:10 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED                                          0x00000001 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED                                          0x00000000 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL                                                      11:11 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED                                         0x00000001 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED                                         0x00000000 /* RW--V */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE                                                             31:12 /* RWIVF */
+#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED                                    0x000FFFFF /* RWI-V */
+#define NV_RUNLIST_INT_CYA_SPARE                               0x044 /* RW-4R */
+#define NV_RUNLIST_INT_CYA_SPARE__PRIV_LEVEL_MASK 0x040 /*       */
+#define NV_RUNLIST_INT_CYA_SPARE_DATA                         31:0 /* RWIUF */
+#define NV_RUNLIST_INT_CYA_SPARE_DATA_INIT              0x00000000 /* RWI-V */
+#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE        0:0 /*       */
+#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL    1:1 /*       */
+#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_1MTHD    0x00000000 /*       */
+#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_2MTHD    0x00000001 /*       */
+#define NV_RUNLIST_CONFIG                                      0x000 /* RW-4R */
+#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH                        0:0 /* RWIVF */
+#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_WEAK            0x00000000 /* RWI-V */
+#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_STRONG          0x00000001 /* RW--V */
+#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH                        4:4 /* RWIVF */
+#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_WEAK            0x00000000 /* RW--V */
+#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_STRONG          0x00000001 /* RWI-V */
+#define NV_RUNLIST_CONFIG_L2_EVICT                                9:8 /* RWIVF */
+#define NV_RUNLIST_CONFIG_L2_EVICT_FIRST                   0x00000000 /* RWI-V */
+#define NV_RUNLIST_CONFIG_L2_EVICT_NORMAL                  0x00000001 /* RW--V */
+#define NV_RUNLIST_CONFIG_L2_EVICT_LAST                    0x00000002 /* RW--V */
+#define NV_RUNLIST_CONFIG_SUBCH4                                10:10 /* RWXVF */
+#define NV_RUNLIST_CONFIG_SUBCH4_INACTIVE                  0x00000000 /* RW--V */
+#define NV_RUNLIST_CONFIG_SUBCH4_ACTIVE                    0x00000001 /* RW--V */
+#define NV_RUNLIST_CHANNEL_CONFIG                              0x004 /* R--4R */
+#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2              3:0 /* C--UF */
+#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2_2K  11 /* C---V */
+#define NV_RUNLIST_CHANNEL_CONFIG_CHRAM_BAR0_OFFSET             31:4 /* R-XVF */
+#define NV_RUNLIST_DOORBELL_CONFIG                             0x008 /* R--4R */
+#define NV_RUNLIST_DOORBELL_CONFIG_ID                          31:16 /* R-XVF */
+#define NV_RUNLIST_FB_CONFIG                                   0x00C /* R--4R */
+#define NV_RUNLIST_FB_CONFIG_FB_THREAD_ID                        7:0 /* R-XVF */
+#define NV_RUNLIST_FB_CONFIG_ESC_ID                             15:8 /* R-XVF */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG(i)              (0x300+(i)*4) /* RW-4A */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG__SIZE_1   64 /*       */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK          11:0 /*       */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW          10:0 /* RWIVF */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW_INIT      2047 /* RWI-V */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET  27:16 /*       */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW  26:16 /* RWIVF */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW_INIT                     0x0 /* RWI-V */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE                31:31 /* RWIVF */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_TRUE               1 /* RW--V */
+#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_FALSE              0 /* RWI-V */
+#define NV_RUNLIST_PBDMA_CONFIG(i)                     (0x010+(i)*4) /* R--4A */
+#define NV_RUNLIST_PBDMA_CONFIG__SIZE_1 2 /*       */
+#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_ID                         7:0 /* R-XUF */
+#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_BAR0_OFFSET              25:10 /* R-XUF */
+#define NV_RUNLIST_PBDMA_CONFIG_VALID                          31:31 /* R-XUF */
+#define NV_RUNLIST_PBDMA_CONFIG_VALID_TRUE                0x00000001 /* R---V */
+#define NV_RUNLIST_PBDMA_CONFIG_VALID_FALSE               0x00000000 /* R---V */
+#define NV_RUNLIST_ACQ_PRETEST                                 0x020 /* RW-4R */
+#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT                            7:0 /* RWIUF */
+#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT_8                   0x00000008 /* RWI-V */
+#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE                        15:12 /* RWIUF */
+#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_0                 0x00000000 /* RWI-V */
+#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_10                0x0000000a /* RW--V */
+#define NV_RUNLIST_IDLE_FILTER                                 0x024 /* RW-4R */
+#define NV_RUNLIST_IDLE_FILTER_PERIOD                             7:0 /* RWIUF */
+#define NV_RUNLIST_IDLE_FILTER_PERIOD_INIT                 0x00000050 /* RWI-V */
+#define NV_RUNLIST_IDLE_FILTER_PERIOD__PROD                0x00000064 /* RW--V */
+#define NV_RUNLIST_IDLE_FILTER_PERIOD_8                    0x00000008 /* RW--V */
+#define NV_RUNLIST_IDLE_FILTER_PERIOD_32                   0x00000020 /* RW--V */
+#define NV_RUNLIST_USERD_WRITEBACK                             0x028 /* RW-4R */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMER                          7:0 /* RWIUF */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMER_DISABLED          0x00000000 /* RW--V */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMER_SHORT             0x00000003 /* RW--V */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMER_100US             0x00000064 /* RWI-V */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE                    15:12 /* RWIUF */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_0             0x00000000 /* RWI-V */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_SHORT         0x00000000 /*       */
+#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_100US         0x00000000 /*       */
+#define NV_RUNLIST_ESCHED_CONFIG                               0x02c /* C--4R */
+#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID                       15:0 /* C--UF */
+#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID_VALUE     50543 /* C---V */
+#define NV_CHRAM_CHANNEL(i)                            (0x000+(i)*4) /* RW-4A */
+#define NV_CHRAM_CHANNEL__SIZE_1   2048 /*       */
+#define NV_CHRAM_CHANNEL_WRITE_CONTROL                          0:0 /* -WIVF */
+#define NV_CHRAM_CHANNEL_WRITE_CONTROL_ONES_SET_BITS     0x00000000 /* -WI-V */
+#define NV_CHRAM_CHANNEL_WRITE_CONTROL_ONES_CLEAR_BITS   0x00000001 /* -W--V */
+#define NV_CHRAM_CHANNEL_ENABLE                                 1:1 /* RWIVF */
+#define NV_CHRAM_CHANNEL_ENABLE_NOT_IN_USE               0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_ENABLE_IN_USE                   0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_NEXT                                   2:2 /* RWIVF */
+#define NV_CHRAM_CHANNEL_NEXT_FALSE                      0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_NEXT_TRUE                       0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_BUSY                                   3:3 /* R-IVF */
+#define NV_CHRAM_CHANNEL_BUSY_FALSE                      0x00000000 /* R-I-V */
+#define NV_CHRAM_CHANNEL_BUSY_TRUE                       0x00000001 /* R---V */
+#define NV_CHRAM_CHANNEL_PBDMA_FAULTED                          4:4 /* RWIVF */
+#define NV_CHRAM_CHANNEL_PBDMA_FAULTED_FALSE             0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_PBDMA_FAULTED_TRUE              0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_ENG_FAULTED                            5:5 /* RWIVF */
+#define NV_CHRAM_CHANNEL_ENG_FAULTED_FALSE               0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_ENG_FAULTED_TRUE                0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_ON_PBDMA                               6:6 /* R-IVF */
+#define NV_CHRAM_CHANNEL_ON_PBDMA_FALSE                  0x00000000 /* R-I-V */
+#define NV_CHRAM_CHANNEL_ON_PBDMA_TRUE                   0x00000001 /* R---V */
+#define NV_CHRAM_CHANNEL_ON_ENG                                 7:7 /* R-IVF */
+#define NV_CHRAM_CHANNEL_ON_ENG_FALSE                    0x00000000 /* R-I-V */
+#define NV_CHRAM_CHANNEL_ON_ENG_TRUE                     0x00000001 /* R---V */
+#define NV_CHRAM_CHANNEL_PENDING                                8:8 /* RWIVF */
+#define NV_CHRAM_CHANNEL_PENDING_FALSE                   0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_PENDING_TRUE                    0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_CTX_RELOAD                             9:9 /* RWIVF */
+#define NV_CHRAM_CHANNEL_CTX_RELOAD_FALSE                0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_CTX_RELOAD_TRUE                 0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_PBDMA_BUSY                           10:10 /* R-IVF */
+#define NV_CHRAM_CHANNEL_PBDMA_BUSY_FALSE                0x00000000 /* R-I-V */
+#define NV_CHRAM_CHANNEL_PBDMA_BUSY_TRUE                 0x00000001 /* R---V */
+#define NV_CHRAM_CHANNEL_ENG_BUSY                             11:11 /* R-IVF */
+#define NV_CHRAM_CHANNEL_ENG_BUSY_FALSE                  0x00000000 /* R-I-V */
+#define NV_CHRAM_CHANNEL_ENG_BUSY_TRUE                   0x00000001 /* R---V */
+#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL                         12:12 /* RWIVF */
+#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_FALSE              0x00000000 /* RWI-V */
+#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_TRUE               0x00000001 /* RW--V */
+#define NV_CHRAM_CHANNEL_STATUS                                       12:8 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_IDLE                            0x00000000 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PENDING                         0x00000001 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PENDING_CTX_RELOAD              0x00000003 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL            0x00000011 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x00000013 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY                      0x00000004 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_AND_ENG_BUSY         0x0000000C /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY                        0x00000008 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL   0x00000019 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING                0x00000009 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_CTX_RELOAD           0x00000006 /*       */
+#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_ENG_BUSY_CTX_RELOAD  0x0000000E /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_CTX_RELOAD             0x0000000A /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_CTX_RELOAD     0x0000000B /*       */
+#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x0000001B /*       */
+#define NV_CHRAM_CHANNEL_UPDATE                                31:0 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_ENABLE_CHANNEL           0x00000002 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_DISABLE_CHANNEL          0x00000003 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_FORCE_CTX_RELOAD         0x00000200 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED      0x00000011 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED        0x00000021 /*       */
+#define NV_CHRAM_CHANNEL_UPDATE_CLEAR_CHANNEL            0xFFFFFFFF /*       */
+#define NV_RUNLIST_SUBMIT_BASE_LO                              0x080 /* RW-4R */
+#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO                       31:12 /* RWIUF */
+#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO_NULL             0x00000000 /* RWI-V */
+#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET                         1:0 /* RWIVF */
+#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_VID_MEM                 0x0 /* RWI-V */
+#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_COHERENT        0x2 /* RW--V */
+#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_NONCOHERENT     0x3 /* RW--V */
+#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_ALIGN_SHIFT                 12 /*       */
+#define NV_RUNLIST_SUBMIT_BASE_HI                              0x084 /* RW-4R */
+#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI                         7:0 /* RWIUF */
+#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI_NULL             0x00000000 /* RWI-V */
+#define NV_RUNLIST_SUBMIT                                      0x088 /* RW-4R */
+#define NV_RUNLIST_SUBMIT_LENGTH                        15:0 /* RWIUF */
+#define NV_RUNLIST_SUBMIT_LENGTH_ZERO             0x00000000 /* RWI-V */
+#define NV_RUNLIST_SUBMIT_LENGTH_MAX              0x0000ffff /* RW--V */
+#define NV_RUNLIST_SUBMIT_OFFSET                       31:16 /* RWIVF */
+#define NV_RUNLIST_SUBMIT_OFFSET_ZERO             0x00000000 /* RWI-V */
+#define NV_RUNLIST_SUBMIT_INFO                                 0x08C /* R--4R */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID                     13:0 /*       */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW    10:0 /* R-IUF */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW_DEFAULT    0x00000000 /* R-I-V */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID              14:14 /* R-IUF */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_FALSE   0x00000000 /* R-I-V */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_TRUE    0x00000001 /* R---V */
+#define NV_RUNLIST_SUBMIT_INFO_PENDING                            15:15 /* R-IVF */
+#define NV_RUNLIST_SUBMIT_INFO_PENDING_FALSE                 0x00000000 /* R-I-V */
+#define NV_RUNLIST_SUBMIT_INFO_PENDING_TRUE                  0x00000001 /* R---V */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET                   31:16 /* R-IVF */
+#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET_ZERO         0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK(i)             (0x190+(i)*4) /* RW-4A */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK__SIZE_1                                      2 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION                                                             3:0 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL   15 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED                                   0x0000000F /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED                                  0x00000008 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0                                                      0:0 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1                                                      1:1 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2                                                      2:2 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3                                                      3:3 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE                                        0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE                                       0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION                                                            7:4 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL  15 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED                                        0x0F /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED                                       0x08 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0                                                     4:4 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1                                                     5:5 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2                                                     6:6 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3                                                     7:7 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE                                       0x00000001 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE                                      0x00000000 /*       */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION                                                              8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR                                          0x00000001 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON                                            0x00000000 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION                                                             9:9 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR                                         0x00000001 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON                                           0x00000000 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL                                                       10:10 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED                                          0x00000001 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED                                          0x00000000 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL                                                      11:11 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED                                         0x00000001 /* RWI-V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED                                         0x00000000 /* RW--V */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE                                                             31:12 /* RWIVF */
+#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED                                    0x000FFFFF /* RWI-V */
+#define NV_RUNLIST_INTR_VECTORID(i)                    (0x160+(i)*4) /* RW-4A */
+#define NV_RUNLIST_INTR_VECTORID__SIZE_1          2 /*       */
+#define NV_RUNLIST_INTR_VECTORID__PRIV_LEVEL_MASK  "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
+#define NV_RUNLIST_INTR_VECTORID_VECTOR                                         11:0 /* RWXUF */
+#define NV_RUNLIST_INTR_VECTORID_GSP                                           30:30 /* RWIUF */
+#define NV_RUNLIST_INTR_VECTORID_GSP_DISABLE                                       0 /* RW--V */
+#define NV_RUNLIST_INTR_VECTORID_GSP_ENABLE                                        1 /* RWI-V */
+#define NV_RUNLIST_INTR_VECTORID_CPU                                           31:31 /* RWIUF */
+#define NV_RUNLIST_INTR_VECTORID_CPU_DISABLE                                       0 /* RW--V */
+#define NV_RUNLIST_INTR_VECTORID_CPU_ENABLE                                        1 /* RWI-V */
+#define NV_RUNLIST_INTR_RETRIGGER(i)                   (0x180+(i)*4) /* -W-4A */
+#define NV_RUNLIST_INTR_RETRIGGER__SIZE_1           2 /*       */
+#define NV_RUNLIST_INTR_RETRIGGER__PRIV_LEVEL_MASK   "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
+#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER                                          0:0 /* -W-VF */
+#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER_TRUE                                       1 /* -W--V */
+#define NV_RUNLIST_INTR_0                                      0x100 /* RW-4R */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0                      0:0 /* RWIVF */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_NOT_PENDING   0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_RESET         0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1                      1:1 /* RWIVF */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_NOT_PENDING   0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_RESET         0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2                      2:2 /* RWIVF */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_NOT_PENDING   0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_RESET         0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG(i)                (i):(i) /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_NOT_PENDING    0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_PENDING        0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_RESET          0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE(i)       ((i)+1):((i)+1) /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_NOT_PENDING   0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_PENDING       0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_RESET         0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_RUNLIST_IDLE                            4:4 /* RWIVF */
+#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_NOT_PENDING         0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_PENDING             0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_RESET               0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE                    5:5 /* RWXVF */
+#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_PENDING     0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_RESET       0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE                         6:6 /* RWXVF */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_NOT_PENDING      0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_PENDING          0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_RESET            0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE                      7:7 /* RWXVF */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_NOT_PENDING   0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_RESET         0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_BAD_TSG                                         12:12 /* RWIVF */
+#define NV_RUNLIST_INTR_0_BAD_TSG_NOT_PENDING                        0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_BAD_TSG_PENDING                            0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_BAD_TSG_RESET                              0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE                    8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_PENDING     0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_RESET       0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0                    16:16 /* R-XVF */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_NOT_PENDING   0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0                    17:17 /* R-XVF */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_NOT_PENDING   0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1                    18:18 /* R-XVF */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_NOT_PENDING   0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1                    19:19 /* R-XVF */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_NOT_PENDING   0x00000000 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_PENDING       0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
+#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_1  2 /*       */
+#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_2  2 /*       */
+#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_NOT_PENDING   0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_PENDING       0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE                    9:9 /* RWIVF */
+#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_PENDING     0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_RESET       0x00000001 /* -W--V */
+#define NV_RUNLIST_INTR_0_RUNLIST_EVENT                               9:9 /*       */
+#define NV_RUNLIST_INTR_0_MASK_SET                             0x110 /* RW-4R */
+#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE                     8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_DISABLED     0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_ENABLED      0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_MASK_CLEAR                           0x118 /* RW-4R */
+#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE                   8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_DISABLED   0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_ENABLED    0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE(i)               (0x120+(i)*8) /* RW-4A */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE__SIZE_1         2 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0                             0:0 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1                             1:1 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2                             2:2 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG(i)                       (i):(i) /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_DISABLED              0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_ENABLED               0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE(i)              ((i)+1):((i)+1) /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE                                   4:4 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_DISABLED                   0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_ENABLED                    0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE                           5:5 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_DISABLED           0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_ENABLED            0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE                                6:6 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_DISABLED                0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_ENABLED                 0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE                   7:7 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED   0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED    0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE                           8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_DISABLED           0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_ENABLED            0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE                       9:9 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED       0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED        0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG                                      12:12 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_DISABLED                        0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_ENABLED                         0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0                           16:16 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0                           17:17 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1                           18:18 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1                           19:19 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE(i)             (0x140+(i)*8) /* RW-4A */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__SIZE_1         2 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0                             0:0 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1                             1:1 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2                             2:2 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG(i)                       (i):(i) /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_DISABLED              0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_ENABLED               0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE(i)              ((i)+1):((i)+1) /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE                                   4:4 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_DISABLED                   0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_ENABLED                    0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE                           5:5 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_DISABLED           0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_ENABLED            0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE                                6:6 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_DISABLED                0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_ENABLED                 0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE                   7:7 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED   0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED    0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE                           8:8 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_DISABLED           0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_ENABLED            0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE                       9:9 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED       0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED        0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG                                      12:12 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_DISABLED                        0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_ENABLED                         0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0                           16:16 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0                           17:17 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1                           18:18 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1                           19:19 /* RWIVF */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO(i)       (0x224+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID                    13:0 /* R-IUF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID_DEFAULT      0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE                  15:14 /* R-IUF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_LOAD        0x00000001 /* R-I-V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SAVE        0x00000002 /* R---V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SWITCH      0x00000003 /* R---V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID                   29:16 /* R-IUF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID_DEFAULT      0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS                       31:30 /* R-IUF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_AWAITING_ACK     0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ENG_WAS_RESET    0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ACK_RECEIVED     0x00000002 /* R---V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_DROPPED_TIMEOUT  0x00000003 /* R---V */
+#define NV_RUNLIST_INFO                                        0x108 /* R--4R */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM                                 0:0 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_UNARMED                  0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_ARMED                    0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM                              1:1 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_UNARMED               0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_ARMED                 0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM                         4:4 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_UNARMED          0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_ARMED            0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM                 5:5 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_UNARMED  0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_ARMED    0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_ENG_IDLE                                              8:8 /* R-IUF */
+#define NV_RUNLIST_INFO_ENG_IDLE_FALSE                                 0x00000000 /* R---V */
+#define NV_RUNLIST_INFO_ENG_IDLE_TRUE                                  0x00000001 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE                                          9:9 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE_FALSE                             0x00000000 /* R---V */
+#define NV_RUNLIST_INFO_RUNLIST_IDLE_TRUE                              0x00000001 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS                                10:10 /* R-IVF */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_IDLE                      0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_BUSY                      0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING                               12:12 /* R-IUF */
+#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_FALSE                    0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_TRUE                     0x00000001 /* R---V */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED                                13:13 /* R-IUF */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_FALSE                     0x00000000 /* R-I-V */
+#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_TRUE                      0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG                                0x174 /* R--4R */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE                               3:0 /* R-IVF */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_NO_ERROR               0x00000000 /* R-I-V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_ZERO_LENGTH_TSG        0x00000001 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_MAX_LENGTH_EXCEEDED    0x00000002 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_RUNLIST_OVERFLOW       0x00000003 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_CHID_ENTRY  0x00000004 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_TSG_HEADER  0x00000005 /* R---V */
+#define NV_RUNLIST_INTR_BAD_TSG_CODE_INVALID_RUNQUEUE       0x00000006 /* R---V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(i)     (0x220+(i)*64) /* RW-4A */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD                      30:0 /* RWIVF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_INIT           0x003fffff /* RWI-V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_MAX            0x7fffffff /* RW--V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION                  31:31 /* RWIVF */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_DISABLED    0x00000000 /* RW--V */
+#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_ENABLED     0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG                                       0x050 /* RW-4R */
+#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT                         5:0 /* RWIVF */
+#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT_INIT             0x00000000 /* RWI-V */
+#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT__PROD            0x00000002 /* RW--V */
+#define NV_RUNLIST_BLKCG_IDLE_CG_EN                              6:6 /* RWIVF */
+#define NV_RUNLIST_BLKCG_IDLE_CG_EN_ENABLED               0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG_IDLE_CG_EN_DISABLED              0x00000000 /* RWI-V */
+#define NV_RUNLIST_BLKCG_IDLE_CG_EN__PROD                 0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG_STATE_CG_EN                             7:7 /*       */
+#define NV_RUNLIST_BLKCG_STATE_CG_EN_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_STATE_CG_EN_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_STATE_CG_EN__PROD                0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT                       13:8 /*       */
+#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT_INIT            0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT__PROD           0x00000002 /*       */
+#define NV_RUNLIST_BLKCG_STALL_CG_EN                           14:14 /* RWIVF */
+#define NV_RUNLIST_BLKCG_STALL_CG_EN_ENABLED              0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG_STALL_CG_EN_DISABLED             0x00000000 /* RWI-V */
+#define NV_RUNLIST_BLKCG_STALL_CG_EN__PROD                0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN                       15:15 /*       */
+#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_ENABLED          0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_DISABLED         0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN__PROD            0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT                        19:16 /* RWIVF */
+#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT_INIT              0x00000000 /* RWI-V */
+#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT__PROD             0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG_THROT_CLK_CNT                         23:20 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_INIT               0x0000000f /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_FULLSPEED          0x0000000f /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_CNT__PROD              0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL                        27:24 /*       */
+#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL_INIT              0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL__PROD             0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_EN                          28:28 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_EN_ENABLED             0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_EN_DISABLED            0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_EN__PROD               0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER                     29:29 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_EN             0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_DIS            0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER__PROD          0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_PAUSE_CG_EN                           30:30 /*       */
+#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_ENABLED              0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_DISABLED             0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_PAUSE_CG_EN__PROD                0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_HALT_CG_EN                            31:31 /*       */
+#define NV_RUNLIST_BLKCG_HALT_CG_EN_ENABLED               0x00000001 /*       */
+#define NV_RUNLIST_BLKCG_HALT_CG_EN_DISABLED              0x00000000 /*       */
+#define NV_RUNLIST_BLKCG_HALT_CG_EN__PROD                 0x00000000 /*       */
+#define NV_RUNLIST_BLKCG1                                      0x054 /* RW-4R */
+#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN                          0:0 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_ENABLED           0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_DISABLED          0x00000000 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG                                  16:1 /*       */
+#define NV_RUNLIST_BLKCG1_SLCG_ENABLED                    0x00000000 /*       */
+#define NV_RUNLIST_BLKCG1_SLCG_DISABLED                   0x0000FFFF /*       */
+#define NV_RUNLIST_BLKCG1_SLCG__PROD                      0x00000001 /*       */
+#define NV_RUNLIST_BLKCG1_SLCG_RLP                               1:1 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_RLP_ENABLED                0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_RLP_DISABLED               0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_RLP__PROD                  0x00000001 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_EVH                               3:3 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_EVH_ENABLED                0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_EVH_DISABLED               0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_EVH__PROD                  0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_EISM                              7:7 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_EISM_ENABLED               0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_EISM_DISABLED              0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_EISM__PROD                 0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_LB                                8:8 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_LB_ENABLED                 0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_LB_DISABLED                0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_LB__PROD                   0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL                         9:9 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_ENABLED          0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_DISABLED         0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL__PROD            0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP                        10:10 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_ENABLED           0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_DISABLED          0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP__PROD             0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB                        11:11 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_ENABLED           0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_DISABLED          0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB__PROD             0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PRI                             13:13 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_PRI_ENABLED                0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_PRI_DISABLED               0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_PRI__PROD                  0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_CHSW                            14:14 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_CHSW_ENABLED               0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_CHSW_DISABLED              0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_CHSW__PROD                 0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_XBAR                            15:15 /* RWIVF */
+#define NV_RUNLIST_BLKCG1_SLCG_XBAR_ENABLED               0x00000000 /* RW--V */
+#define NV_RUNLIST_BLKCG1_SLCG_XBAR_DISABLED              0x00000001 /* RWI-V */
+#define NV_RUNLIST_BLKCG1_SLCG_XBAR__PROD                 0x00000000 /* RW--V */
+#define NV_RUNLIST_SLCG_MISC                                   0x05C /* RW-4R */
+#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS                     3:0 /* RWIVF */
+#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS_ZERO         0x00000000 /* RWI-V */
+#define NV_RUNLIST_INTERNAL_DOORBELL                           0x090 /* -W-4R */
+#define NV_RUNLIST_INTERNAL_DOORBELL_CHID      11:0 /*       */
+#define NV_RUNLIST_INTERNAL_DOORBELL_CHID_HW      10:0 /* -WXUF */
+#define NV_RUNLIST_INTERNAL_DOORBELL_GFID                      21:16 /* -WXUF */
+#define NV_RUNLIST_SCHED_DISABLE                               0x094 /* RW-4R */
+#define NV_RUNLIST_SCHED_DISABLE_RUNLIST                         0:0 /* RWIVF */
+#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_ENABLED          0x00000000 /* RWI-V */
+#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_DISABLED         0x00000001 /* RW--V */
+#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_FALSE            0x00000000 /*       */
+#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_TRUE             0x00000001 /*       */
+#define NV_RUNLIST_PREEMPT                                     0x098 /* RW-4R */
+#define NV_RUNLIST_PREEMPT_ID                                    11:0 /*       */
+#define NV_RUNLIST_PREEMPT_ID_HW                   10:0 /* RWIUF */
+#define NV_RUNLIST_PREEMPT_ID_HW_NULL                      0x00000000 /* RWI-V */
+#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING                  20:20 /* R-IVF */
+#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING_FALSE       0x00000000 /* R-I-V */
+#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING_TRUE        0x00000001 /* R---V */
+#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING              21:21 /* R-IVF */
+#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING_FALSE   0x00000000 /* R-I-V */
+#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING_TRUE    0x00000001 /* R---V */
+#define NV_RUNLIST_PREEMPT_TYPE                                 25:24 /* RWIVF */
+#define NV_RUNLIST_PREEMPT_TYPE_RUNLIST                    0x00000000 /* RWI-V */
+#define NV_RUNLIST_PREEMPT_TYPE_TSG                        0x00000001 /* RW--V */
+#define NV_RUNLIST_ENGINE_STATUS0(i)                  (0x200+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS0__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_TSGID          11:0 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_TSGID_HW          10:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS                     15:13 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_INVALID        0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_VALID          0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SAVE     0x00000005 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_LOAD     0x00000006 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SWITCH   0x00000007 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX                            13:13 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_INVALID               0x00000000 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTX_VALID                 0x00000001 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD                        14:14 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_NOT_IN_PROGRESS   0x00000000 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_IN_PROGRESS       0x00000001 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXSW                          15:15 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_NOT_IN_PROGRESS     0x00000000 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_IN_PROGRESS         0x00000001 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID  27:16 /*       */
+#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID_HW  26:16 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD                     29:29 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_FALSE          0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_TRUE           0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_FAULTED                        30:30 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_FALSE             0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_TRUE              0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS0_ENGINE                         31:31 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_IDLE               0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_BUSY               0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS1(i)                  (0x204+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS1__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS1_GFID                             5:0 /* R-XVF */
+#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID                       13:8 /* R-XVF */
+#define NV_RUNLIST_ENGINE_STATUS1_INTR_ID                        20:16 /* R-XVF */
+#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID                     30:30 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_FALSE          0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_TRUE           0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID                31:31 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_FALSE     0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_TRUE      0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL(i,j)    (0x208+(i)*64+(j)*4) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_2 2 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID                11:0 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID_HW                10:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID                               15:15 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_FALSE                    0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_TRUE                     0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID  27:16 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID_HW  26:16 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID                          31:31 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_FALSE               0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_TRUE                0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG(i)             (0x228+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN                          0:0 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_DISABLED          0x00000000 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_ENABLED           0x00000001 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS               8:8 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_FALSE  0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_TRUE   0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI                    12:12 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_FALSE         0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_TRUE          0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS             16:16 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_FALSE  0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_TRUE   0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI                    20:20 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_FALSE         0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_TRUE          0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_DEBUG_ENGINE_ID                    29:24 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_INST(i)              (0x210+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_INST__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET                            1:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_VID_MEM             0x00000000 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_COHERENT    0x00000002 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_INST_VALID                           11:11 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_FALSE                0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_TRUE                 0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_INST_PTR_LO                          31:12 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_INST_HI(i)           (0x214+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_INST_HI__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI                        31:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI_ZERO             0x00000000 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST(i)         (0x218+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET                            1:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_VID_MEM             0x00000000 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_COHERENT    0x00000002 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID                           11:11 /* R-IVF */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_FALSE                0x00000000 /* R-I-V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_TRUE                 0x00000001 /* R---V */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_PTR_LO                          31:12 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI(i)      (0x21C+(i)*64) /* R--4A */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI__SIZE_1 3 /*       */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI                      31:0 /* R-XUF */
+#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI_ZERO           0x00000000 /* R---V */
+#endif // __ga100_dev_runlist_h__
--- a/kernel-open/nvidia-uvm/hwref/maxwell/gm107/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/maxwell/gm107/dev_mmu.h
@@ -0,0 +1,339 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __gm107_dev_mmu_h__
+#define __gm107_dev_mmu_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_MMU_PDE                                                      /* ----G */
+#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PDE__SIZE                                              8
+#define NV_MMU_PTE                                                      /* ----G */
+#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
+#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
+#define NV_MMU_PTE_COMPTAGLINE                      (1*32+28):(1*32+12) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
+#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
+#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
+#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PTE__SIZE                                             8
+#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_PTE_KIND_INVALID                       0xff /* R---V */
+#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
+#define NV_MMU_PTE_KIND_Z16                           0x01 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2C                        0x02 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2C                    0x03 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2C                    0x04 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2C                    0x05 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2C                   0x06 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2Z                        0x07 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2Z                    0x08 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2Z                    0x09 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2Z                    0x0a /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2Z                   0x0b /* R---V */
+#define NV_MMU_PTE_KIND_Z16_4CZ                       0x0c /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_4CZ                   0x0d /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_4CZ                   0x0e /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_4CZ                   0x0f /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_4CZ                  0x10 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24                         0x11 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_1Z                      0x12 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z                  0x13 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z                  0x14 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z                  0x15 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z                 0x16 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CZ                     0x17 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ                 0x18 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ                 0x19 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ                 0x1a /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ                0x1b /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CS                     0x1c /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS                 0x1d /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS                 0x1e /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS                 0x1f /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS                0x20 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_4CSZV                   0x21 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV               0x22 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV               0x23 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV               0x24 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV              0x25 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12                0x26 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4                 0x27 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8                 0x28 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24                0x29 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV            0x2e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV             0x2f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV             0x30 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV            0x31 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS            0x32 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS             0x33 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS             0x34 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS            0x35 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV           0x3a /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV            0x3b /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV            0x3c /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV           0x3d /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV            0x3e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV             0x3f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV             0x40 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV            0x41 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV          0x42 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV           0x43 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV           0x44 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV          0x45 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8                         0x46 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_1Z                      0x47 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z                  0x48 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z                  0x49 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z                  0x4a /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z                 0x4b /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CS                     0x4c /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS                 0x4d /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS                 0x4e /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS                 0x4f /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS                0x50 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CZ                     0x51 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ                 0x52 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ                 0x53 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ                 0x54 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ                0x55 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_4CSZV                   0x56 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV               0x57 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV               0x58 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV               0x59 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV              0x5a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12                0x5b /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4                 0x5c /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8                 0x5d /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24                0x5e /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV            0x63 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV             0x64 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV             0x65 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV            0x66 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS            0x67 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS             0x68 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS             0x69 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS            0x6a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV           0x6f /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV            0x70 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV            0x71 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV           0x72 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV            0x73 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV             0x74 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV             0x75 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV            0x76 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV          0x77 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV           0x78 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV           0x79 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV          0x7a /* R---V */
+#define NV_MMU_PTE_KIND_ZF32                          0x7b /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_1Z                       0x7c /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_1Z                   0x7d /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_1Z                   0x7e /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_1Z                   0x7f /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_1Z                  0x80 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CS                      0x81 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CS                  0x82 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CS                  0x83 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CS                  0x84 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CS                 0x85 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CZ                      0x86 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ                  0x87 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ                  0x88 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ                  0x89 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ                 0x8a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12        0x8b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4         0x8c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8         0x8d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24        0x8e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS    0x8f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS     0x90 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS     0x91 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS    0x92 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV    0x97 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV     0x98 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV     0x99 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV    0x9a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV   0x9b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV    0x9c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV    0x9d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV   0x9e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS    0x9f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS     0xa0 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS     0xa1 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS    0xa2 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV  0xa3 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV   0xa4 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV   0xa5 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV  0xa6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12         0xa7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4          0xa8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8          0xa9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24         0xaa /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS     0xab /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS      0xac /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS      0xad /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS     0xae /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV     0xb3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV      0xb4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV      0xb5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV     0xb6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV    0xb7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV     0xb8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV     0xb9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV    0xba /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS     0xbb /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS      0xbc /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS      0xbd /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS     0xbe /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV   0xbf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV    0xc0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV    0xc1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV   0xc2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8                    0xc3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS                0xc4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS            0xc5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS            0xc6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS            0xc7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS           0xc8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV              0xce /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV          0xcf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV          0xd0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV          0xd1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV         0xd2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS                0xd3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS            0xd4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS            0xd5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS            0xd6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS           0xd7 /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_16BX2                 0xfe /* R---V */
+#define NV_MMU_PTE_KIND_C32_2C                        0xd8 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBR                      0xd9 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBA                      0xda /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CRA                      0xdb /* R---V */
+#define NV_MMU_PTE_KIND_C32_2BRA                      0xdc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2C                    0xdd /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CBR                  0xde /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CRA                  0xcc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2C                    0xdf /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBR                  0xe0 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBA                  0xe1 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CRA                  0xe2 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2BRA                  0xe3 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C               0xe4 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA             0xe5 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2C                        0xe6 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBR                      0xe7 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBA                      0xe8 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CRA                      0xe9 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2BRA                      0xea /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2C                    0xeb /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CBR                  0xec /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CRA                  0xcd /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2C                    0xed /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBR                  0xee /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBA                  0xef /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CRA                  0xf0 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2BRA                  0xf1 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C               0xf2 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA             0xf3 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2C                       0xf4 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2CR                      0xf5 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2C                   0xf6 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2CR                  0xf7 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2C                   0xf8 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2CR                  0xf9 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C              0xfa /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR             0xfb /* R---V */
+#define NV_MMU_PTE_KIND_X8C24                         0xfc /* R---V */
+#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE              0xfd /* R---V */
+#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                0xca /* R---V */
+#define NV_MMU_PTE_KIND_SMHOST_MESSAGE                0xcb /* R---V */
+#endif // __gm107_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_fault.h
+++ b/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_fault.h
@@ -0,0 +1,203 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __gp100_dev_fault_h__
+#define __gp100_dev_fault_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_PFAULT_MMU_ENG_ID_GRAPHICS          0 /*       */
+#define NV_PFAULT_MMU_ENG_ID_DISPLAY           1 /*       */
+#define NV_PFAULT_MMU_ENG_ID_IFB               3 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1              4 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR2              5 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST0             6 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST1             7 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST2             8 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST3             9 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST4             10 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST5             11 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST6             12 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST7             13 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST8             14 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST9             15 /*        */
+#define NV_PFAULT_MMU_ENG_ID_HOST10            16 /*       */
+#define NV_PFAULT_MMU_ENG_ID_SEC               18 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PERF              19 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC             2 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GRCOPY            27 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE0               21 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE1               22 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE2               27 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE3               28 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE4               29 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE5               30 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PWR_PMU           23 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PTP               24 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC             25 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC0            25 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC1            17 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC2            20 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PHYSICAL          31 /*       */
+#define NV_PFAULT_FAULT_TYPE                             4:0 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE                  0x00000000 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE_SIZE             0x00000001 /*       */
+#define NV_PFAULT_FAULT_TYPE_PTE                  0x00000002 /*       */
+#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION   0x00000003 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK   0x00000004 /*       */
+#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION       0x00000005 /*       */
+#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION         0x00000006 /*       */
+#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /*       */
+#define NV_PFAULT_FAULT_TYPE_WORK_CREATION        0x00000009 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /*       */
+#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE  0x0000000b /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND     0x0000000c /*       */
+#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION     0x0000000d /*       */
+#define NV_PFAULT_FAULT_TYPE_POISONED             0x0000000e /*       */
+#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION     0x0000000f /*       */
+#define NV_PFAULT_CLIENT                       14:8 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_0        0x00000000 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_0        0x00000001 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_0        0x00000002 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_1        0x00000003 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_1        0x00000004 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_1        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_2        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_2        0x00000007 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_2        0x00000008 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_3        0x00000009 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_3        0x0000000A /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_3        0x0000000B /*       */
+#define NV_PFAULT_CLIENT_GPC_RAST        0x0000000C /*       */
+#define NV_PFAULT_CLIENT_GPC_GCC         0x0000000D /*       */
+#define NV_PFAULT_CLIENT_GPC_GPCCS       0x0000000E /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_0      0x0000000F /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_1      0x00000010 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_2      0x00000011 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_3      0x00000012 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_4        0x00000014 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_4        0x00000015 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_4        0x00000016 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_5        0x00000017 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_5        0x00000018 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_5        0x00000019 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_6        0x0000001A /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_6        0x0000001B /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_6        0x0000001C /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_7        0x0000001D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_7        0x0000001E /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_7        0x0000001F /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_8        0x00000020 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_8        0x00000021 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_8        0x00000022 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_9        0x00000023 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_9        0x00000024 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_9        0x00000025 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_10       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_10       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_10       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_GPC_L1_11       0x00000029 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_11       0x0000002A /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_11       0x0000002B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_0     0x00000030 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_1     0x00000031 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_2     0x00000032 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_3     0x00000033 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_4     0x00000034 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_5     0x00000035 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_6     0x00000036 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_7     0x00000037 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_8     0x00000038 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_9     0x00000039 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_10    0x0000003A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_11    0x0000003B /*       */
+#define NV_PFAULT_CLIENT_GPC_GPM         0x00000013 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0  0x00000014 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1  0x00000015 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2  0x00000016 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3  0x00000017 /*       */
+#define NV_PFAULT_CLIENT_GPC_RGG_UTLB    0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE0         0x00000001 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE1         0x00000002 /*       */
+#define NV_PFAULT_CLIENT_HUB_DNISO       0x00000003 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE          0x00000004 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU    0x00000007 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /*       */
+#define NV_PFAULT_CLIENT_HUB_ISO         0x00000009 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU         0x0000000A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC       0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC1      0x0000000D /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC2      0x00000033 /*       */
+#define NV_PFAULT_CLIENT_HUB_NISO        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_HUB_P2P         0x0000000F /*       */
+#define NV_PFAULT_CLIENT_HUB_PD          0x00000010 /*       */
+#define NV_PFAULT_CLIENT_HUB_PERF        0x00000011 /*       */
+#define NV_PFAULT_CLIENT_HUB_PMU         0x00000012 /*       */
+#define NV_PFAULT_CLIENT_HUB_RASTERTWOD  0x00000013 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC         0x00000014 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC_NB      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_HUB_SEC         0x00000016 /*       */
+#define NV_PFAULT_CLIENT_HUB_SSYNC       0x00000017 /*       */
+#define NV_PFAULT_CLIENT_HUB_VIP         0x00000000 /*       */
+#define NV_PFAULT_CLIENT_HUB_GRCOPY      0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE2         0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_XV          0x00000019 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU_NB      0x0000001A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC       0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC0      0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_DFALCON     0x0000001C /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED        0x0000001D /*       */
+#define NV_PFAULT_CLIENT_HUB_AFALCON     0x0000001E /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE0       0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE1       0x00000021 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE2       0x00000022 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE3       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE4       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE5       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE6       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE7       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE8       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE9       0x00000029 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSHUB       0x0000002A /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X0      0x0000002B /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X1      0x0000002C /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X2      0x0000002D /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X3      0x0000002E /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X4      0x0000002F /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X5      0x00000030 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X6      0x00000031 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X7      0x00000032 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /*       */
+#define NV_PFAULT_CLIENT_HUB_DONT_CARE   0x0000001F /*       */
+#define NV_PFAULT_ACCESS_TYPE                 18:16 /*       */
+#define NV_PFAULT_ACCESS_TYPE_READ       0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_WRITE      0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_ATOMIC     0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PREFETCH   0x00000003 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE             20:20 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_GPC    0x00000000 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_HUB    0x00000001 /*       */
+#define NV_PFAULT_GPC_ID                      28:24 /*       */
+#endif // __gp100_dev_fault_h__
--- a/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_fb.h
+++ b/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_fb.h
@@ -0,0 +1,71 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+// Excerpt of gp100/dev_fb.h
+
+#ifndef __dev_fb_h__
+#define __dev_fb_h__
+
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA                                   0:0 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA_FALSE                      0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA_TRUE                       0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB                                  1:1 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB_FALSE                     0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB_TRUE                      0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY                                   5:3 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_NONE                       0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_START                      0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_START_ACK_ALL              0x00000002 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL_TARGETED            0x00000003 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL_GLOBAL              0x00000004 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL                     0x00000004 /*       */
+#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR                              6:6 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR_FALSE                 0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR_TRUE                  0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ACK                                     8:7 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_ACK_NONE_REQUIRED                0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ACK_INTRANODE                    0x00000002 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_ACK_GLOBALLY                     0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_ID                       14:9 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_GPC_ID                         19:15 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE                    20:20 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE_GPC           0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE_HUB           0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL                           26:24 /* RWXVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_ALL                  0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_PTE_ONLY             0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE0           0x00000002 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE1           0x00000003 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE2           0x00000004 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE3           0x00000005 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE4           0x00000006 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE5           0x00000007 /* RW--V */
+#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER                                31:31 /* -WEVF */
+#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER_FALSE                     0x00000000 /* -WE-V */
+#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER_TRUE                      0x00000001 /* -W--T */
+
+#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER                          1:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER_SEND_ALL          0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER_SEND_NONE         0x00000003 /* RW--V */
+
+#endif // __dev_fb_h__
--- a/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/pascal/gp100/dev_mmu.h
@@ -0,0 +1,625 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __gp100_dev_mmu_h__
+#define __gp100_dev_mmu_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_MMU_PDE                                                      /* ----G */
+#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PDE__SIZE                                              8
+#define NV_MMU_PTE                                                      /* ----G */
+#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
+#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_PTE_COMPTAGLINE                      (1*32+18+11):(1*32+12) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
+#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
+#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
+#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PTE__SIZE                                             8
+#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_PTE_KIND_INVALID                       0xff /* R---V */
+#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
+#define NV_MMU_PTE_KIND_Z16                           0x01 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2C                        0x02 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2C                    0x03 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2C                    0x04 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2C                    0x05 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2C                   0x06 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2Z                        0x07 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2Z                    0x08 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2Z                    0x09 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2Z                    0x0a /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2Z                   0x0b /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2CZ                       0x36 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2CZ                   0x37 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2CZ                   0x38 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2CZ                   0x39 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2CZ                  0x5f /* R---V */
+#define NV_MMU_PTE_KIND_Z16_4CZ                       0x0c /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_4CZ                   0x0d /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_4CZ                   0x0e /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_4CZ                   0x0f /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_4CZ                  0x10 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24                         0x11 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_1Z                      0x12 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z                  0x13 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z                  0x14 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z                  0x15 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z                 0x16 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CZ                     0x17 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ                 0x18 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ                 0x19 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ                 0x1a /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ                0x1b /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CS                     0x1c /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS                 0x1d /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS                 0x1e /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS                 0x1f /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS                0x20 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_4CSZV                   0x21 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV               0x22 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV               0x23 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV               0x24 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV              0x25 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12                0x26 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4                 0x27 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8                 0x28 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24                0x29 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV            0x2e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV             0x2f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV             0x30 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV            0x31 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS            0x32 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS             0x33 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS             0x34 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS            0x35 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV           0x3a /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV            0x3b /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV            0x3c /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV           0x3d /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV            0x3e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV             0x3f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV             0x40 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV            0x41 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV          0x42 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV           0x43 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV           0x44 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV          0x45 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8                         0x46 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_1Z                      0x47 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z                  0x48 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z                  0x49 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z                  0x4a /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z                 0x4b /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CS                     0x4c /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS                 0x4d /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS                 0x4e /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS                 0x4f /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS                0x50 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CZ                     0x51 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ                 0x52 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ                 0x53 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ                 0x54 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ                0x55 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_4CSZV                   0x56 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV               0x57 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV               0x58 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV               0x59 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV              0x5a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12                0x5b /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4                 0x5c /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8                 0x5d /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24                0x5e /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV            0x63 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV             0x64 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV             0x65 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV            0x66 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS            0x67 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS             0x68 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS             0x69 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS            0x6a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV           0x6f /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV            0x70 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV            0x71 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV           0x72 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV            0x73 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV             0x74 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV             0x75 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV            0x76 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV          0x77 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV           0x78 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV           0x79 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV          0x7a /* R---V */
+#define NV_MMU_PTE_KIND_ZF32                          0x7b /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_1Z                       0x7c /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_1Z                   0x7d /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_1Z                   0x7e /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_1Z                   0x7f /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_1Z                  0x80 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CS                      0x81 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CS                  0x82 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CS                  0x83 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CS                  0x84 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CS                 0x85 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CZ                      0x86 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ                  0x87 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ                  0x88 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ                  0x89 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ                 0x8a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12        0x8b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4         0x8c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8         0x8d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24        0x8e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS    0x8f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS     0x90 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS     0x91 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS    0x92 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV    0x97 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV     0x98 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV     0x99 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV    0x9a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV   0x9b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV    0x9c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV    0x9d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV   0x9e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS    0x9f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS     0xa0 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS     0xa1 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS    0xa2 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV  0xa3 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV   0xa4 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV   0xa5 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV  0xa6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12         0xa7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4          0xa8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8          0xa9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24         0xaa /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS     0xab /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS      0xac /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS      0xad /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS     0xae /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV     0xb3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV      0xb4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV      0xb5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV     0xb6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV    0xb7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV     0xb8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV     0xb9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV    0xba /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS     0xbb /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS      0xbc /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS      0xbd /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS     0xbe /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV   0xbf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV    0xc0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV    0xc1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV   0xc2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8                    0xc3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS                0xc4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS            0xc5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS            0xc6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS            0xc7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS           0xc8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV              0xce /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV          0xcf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV          0xd0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV          0xd1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV         0xd2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS                0xd3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS            0xd4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS            0xd5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS            0xd6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS           0xd7 /* R---V */
+#define NV_MMU_PTE_KIND_S8                            0x2a /* R---V */
+#define NV_MMU_PTE_KIND_S8_2S                         0x2b /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_16BX2                 0xfe /* R---V */
+#define NV_MMU_PTE_KIND_C32_2C                        0xd8 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBR                      0xd9 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBA                      0xda /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CRA                      0xdb /* R---V */
+#define NV_MMU_PTE_KIND_C32_2BRA                      0xdc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2C                    0xdd /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CBR                  0xde /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CRA                  0xcc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2C                    0xdf /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBR                  0xe0 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBA                  0xe1 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CRA                  0xe2 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2BRA                  0xe3 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_4CBRA                 0x2c /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C               0xe4 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA             0xe5 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2C                        0xe6 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBR                      0xe7 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBA                      0xe8 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CRA                      0xe9 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2BRA                      0xea /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2C                    0xeb /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CBR                  0xec /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CRA                  0xcd /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2C                    0xed /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBR                  0xee /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBA                  0xef /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CRA                  0xf0 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2BRA                  0xf1 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_4CBRA                 0x2d /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C               0xf2 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA             0xf3 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2C                       0xf4 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2CR                      0xf5 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2C                   0xf6 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2CR                  0xf7 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2C                   0xf8 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2CR                  0xf9 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C              0xfa /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR             0xfb /* R---V */
+#define NV_MMU_PTE_KIND_X8C24                         0xfc /* R---V */
+#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE              0xfd /* R---V */
+#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                0xca /* R---V */
+#define NV_MMU_PTE_KIND_SMHOST_MESSAGE                0xcb /* R---V */
+#define NV_MMU_VER1_PDE                                                      /* ----G */
+#define NV_MMU_VER1_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PDE__SIZE                                              8
+#define NV_MMU_VER1_PTE                                                      /* ----G */
+#define NV_MMU_VER1_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_VER1_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER1_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_COMPTAGLINE                      (1*32+18+11):(1*32+12) /* RWXVF */
+#define NV_MMU_VER1_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PTE__SIZE                                             8
+#define NV_MMU_VER1_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_NEW_PDE                                                      /* ----G */
+#define NV_MMU_NEW_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PDE__SIZE                                              8
+#define NV_MMU_NEW_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_NEW_DUAL_PDE__SIZE                                             16
+#define NV_MMU_NEW_PTE                                                      /* ----G */
+#define NV_MMU_NEW_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_NEW_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_NEW_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_NEW_PTE_COMPTAGLINE   (18+35):36 /* RWXVF */
+#define NV_MMU_NEW_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PTE__SIZE                                              8
+#define NV_MMU_VER2_PDE                                                      /* ----G */
+#define NV_MMU_VER2_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PDE__SIZE                                              8
+#define NV_MMU_VER2_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_VER2_DUAL_PDE__SIZE                                             16
+#define NV_MMU_VER2_PTE                                                      /* ----G */
+#define NV_MMU_VER2_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER2_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_VER2_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_VER2_PTE_COMPTAGLINE   (18+35):36 /* RWXVF */
+#define NV_MMU_VER2_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PTE__SIZE                                              8
+#endif // __gp100_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/hwref/turing/tu102/dev_fault.h
+++ b/kernel-open/nvidia-uvm/hwref/turing/tu102/dev_fault.h
@@ -0,0 +1,400 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __tu102_dev_fault_h__
+#define __tu102_dev_fault_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_PFAULT_MMU_ENG_ID_GRAPHICS          64 /*       */
+#define NV_PFAULT_MMU_ENG_ID_DISPLAY           1 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GSP               2 /*       */
+#define NV_PFAULT_MMU_ENG_ID_IFB               9 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1              128 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR2              192 /*       */
+#define NV_PFAULT_MMU_ENG_ID_SEC               14 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PERF              8 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC             10 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC0            10 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC1            25 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC2            26 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVJPG0            24 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GRCOPY            15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE0               15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE1               16 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE2               17 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE3               18 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE4               19 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE5               20 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE6               21 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE7               22 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE8               23 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PWR_PMU           6 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PTP               3 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC0            11 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC1            12 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC2            13 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PHYSICAL          31 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST0             32 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST1             33 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST2             34 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST3             35 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST4             36 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST5             37 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST6             38 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST7             39 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST8             40 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST9             41 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST10            42 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST11            43 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST12            44 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST13            45 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST14            46 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0          128  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1          129  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2          130  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3          131  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4          132  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5          133  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6          134  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7          135  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8          136  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9          137  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10         138 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11         139 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12         140 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13         141 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14         142 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15         143 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16         144 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17         145 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18         146 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19         147 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20         148 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21         149 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22         150 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23         151 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24         152 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25         153 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26         154 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27         155 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28         156 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29         157 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30         158 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31         159 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32         160 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33         161 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34         162 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35         163 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36         164 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37         165 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38         166 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39         167 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40         168 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41         169 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42         170 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43         171 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44         172 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45         173 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46         174 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47         175 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48         176 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49         177 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50         178 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51         179 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52         180 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53         181 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54         182 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55         183 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56         184 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57         185 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58         186 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59         187 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60         188 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61         189 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62         190 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63         191 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0          192  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1          193  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2          194  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3          195  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4          196  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5          197  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6          198  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7          199  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8          200  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9          201  /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10         202 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11         203 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12         204 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13         205 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14         206 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15         207 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16         208 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17         209 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18         210 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19         211 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20         212 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21         213 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22         214 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23         215 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24         216 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25         217 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26         218 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27         219 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28         220 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29         221 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30         222 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31         223 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32         224 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33         225 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34         226 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35         227 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36         228 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37         229 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38         230 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39         231 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40         232 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41         233 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42         234 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43         235 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44         236 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45         237 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46         238 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47         239 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48         240 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49         241 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50         242 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51         243 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52         244 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53         245 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54         246 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55         247 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56         248 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57         249 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58         250 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59         251 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60         252 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61         253 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62         254 /*     */
+#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63         255 /*     */
+#define NV_PFAULT_FAULT_TYPE                             4:0 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE                  0x00000000 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE_SIZE             0x00000001 /*       */
+#define NV_PFAULT_FAULT_TYPE_PTE                  0x00000002 /*       */
+#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION   0x00000003 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK   0x00000004 /*       */
+#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION       0x00000005 /*       */
+#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION         0x00000006 /*       */
+#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION         0x00000007 /*       */
+#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /*       */
+#define NV_PFAULT_FAULT_TYPE_WORK_CREATION        0x00000009 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /*       */
+#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE  0x0000000b /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND     0x0000000c /*       */
+#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION     0x0000000d /*       */
+#define NV_PFAULT_FAULT_TYPE_POISONED             0x0000000e /*       */
+#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION     0x0000000f /*       */
+#define NV_PFAULT_CLIENT                       14:8 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_0        0x00000000 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_1        0x00000001 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_2        0x00000002 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_3        0x00000003 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_4        0x00000004 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_5        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_6        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_7        0x00000007 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_0        0x00000008 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_1        0x00000009 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_2        0x0000000A /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_3        0x0000000B /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_4        0x0000000C /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_5        0x0000000D /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_6        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_7        0x0000000F /*       */
+#define NV_PFAULT_CLIENT_GPC_RAST        0x00000010 /*       */
+#define NV_PFAULT_CLIENT_GPC_GCC         0x00000011 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPCCS       0x00000012 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_0      0x00000013 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_1      0x00000014 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_2      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_3      0x00000016 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_8        0x00000021 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_9        0x00000022 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_10       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_11       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_12       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_13       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_14       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_15       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_0     0x00000029 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_1     0x0000002A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_2     0x0000002B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_3     0x0000002C /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_4     0x0000002D /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_5     0x0000002E /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_6     0x0000002F /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_7     0x00000030 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_8        0x00000031 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_9        0x00000032 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_8     0x00000033 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_9     0x00000034 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_16       0x00000035 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_17       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_18       0x00000037 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_19       0x00000038 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_10       0x00000039 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_11       0x0000003A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_10    0x0000003B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_11    0x0000003C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_20       0x0000003D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_21       0x0000003E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_22       0x0000003F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_23       0x00000040 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_12       0x00000041 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_13       0x00000042 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_12    0x00000043 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_13    0x00000044 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_24       0x00000045 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_25       0x00000046 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_26       0x00000047 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_27       0x00000048 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_14       0x00000049 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_15       0x0000004A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_14    0x0000004B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_15    0x0000004C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_28       0x0000004D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_29       0x0000004E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_30       0x0000004F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_31       0x00000050 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_16       0x00000051 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_17       0x00000052 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_16    0x00000053 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_17    0x00000054 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_32       0x00000055 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_33       0x00000056 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_34       0x00000057 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_35       0x00000058 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_18       0x00000059 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_19       0x0000005A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_18    0x0000005B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_19    0x0000005C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_36       0x0000005D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_37       0x0000005E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_38       0x0000005F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_39       0x00000060 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPM         0x00000017 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0  0x00000018 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1  0x00000019 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2  0x0000001A /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3  0x0000001B /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4  0x0000001C /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5  0x0000001D /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6  0x0000001E /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7  0x0000001F /*       */
+#define NV_PFAULT_CLIENT_GPC_RGG_UTLB    0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE0         0x00000001 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE1         0x00000002 /*       */
+#define NV_PFAULT_CLIENT_HUB_DNISO       0x00000003 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE          0x00000004 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU    0x00000007 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /*       */
+#define NV_PFAULT_CLIENT_HUB_ISO         0x00000009 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU         0x0000000A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC       0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC0      0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC1      0x0000000D /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC2      0x00000033 /*       */
+#define NV_PFAULT_CLIENT_HUB_NISO        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_HUB_P2P         0x0000000F /*       */
+#define NV_PFAULT_CLIENT_HUB_PD          0x00000010 /*       */
+#define NV_PFAULT_CLIENT_HUB_PERF        0x00000011 /*       */
+#define NV_PFAULT_CLIENT_HUB_PMU         0x00000012 /*       */
+#define NV_PFAULT_CLIENT_HUB_RASTERTWOD  0x00000013 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC         0x00000014 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC_NB      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_HUB_SEC         0x00000016 /*       */
+#define NV_PFAULT_CLIENT_HUB_SSYNC       0x00000017 /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC1      0x0000003A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC2      0x0000003B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVJPG0      0x0000003C /*       */
+#define NV_PFAULT_CLIENT_HUB_VIP         0x00000000 /*       */
+#define NV_PFAULT_CLIENT_HUB_GRCOPY      0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE2         0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_XV          0x00000019 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU_NB      0x0000001A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC       0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC0      0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_DFALCON     0x0000001C /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED        0x0000001D /*       */
+#define NV_PFAULT_CLIENT_HUB_AFALCON     0x0000001E /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE0       0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE1       0x00000021 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE2       0x00000022 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE3       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE4       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE5       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE6       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE7       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE8       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE9       0x00000029 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSHUB       0x0000002A /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X0      0x0000002B /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X1      0x0000002C /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X2      0x0000002D /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X3      0x0000002E /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X4      0x0000002F /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X5      0x00000030 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X6      0x00000031 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X7      0x00000032 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /*       */
+#define NV_PFAULT_CLIENT_HUB_DWBIF       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_HUB_FBFALCON    0x00000037 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE_SHIM     0x00000038 /*       */
+#define NV_PFAULT_CLIENT_HUB_GSP         0x00000039 /*       */
+#define NV_PFAULT_CLIENT_HUB_DONT_CARE   0x0000001F /*       */
+#define NV_PFAULT_ACCESS_TYPE                 19:16 /*       */
+#define NV_PFAULT_ACCESS_TYPE_READ       0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_WRITE      0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_ATOMIC     0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PREFETCH   0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_READ          0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE         0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC        0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH      0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK   0x00000004 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_READ          0x00000008 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE         0x00000009 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC        0x0000000a /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH      0x0000000b /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE             20:20 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_GPC    0x00000000 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_HUB    0x00000001 /*       */
+#define NV_PFAULT_GPC_ID                      28:24 /*       */
+#define NV_PFAULT_PROTECTED_MODE              29:29 /*       */
+#define NV_PFAULT_REPLAYABLE_FAULT_EN         30:30 /*       */
+#define NV_PFAULT_VALID                       31:31 /*       */
+#endif // __tu102_dev_fault_h__
--- a/kernel-open/nvidia-uvm/hwref/turing/tu102/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/turing/tu102/dev_mmu.h
@@ -0,0 +1,649 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __tu102_dev_mmu_h__
+#define __tu102_dev_mmu_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_MMU_PDE                                                      /* ----G */
+#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PDE__SIZE                                              8
+#define NV_MMU_PTE                                                      /* ----G */
+#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
+#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
+#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
+#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
+#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PTE__SIZE                                             8
+#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_PTE_KIND_INVALID                       0x07 /* R---V */
+#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_MEMORY                                                  0x06 /* R---V */
+#define NV_MMU_PTE_KIND_Z16                                                             0x01 /* R---V */
+#define NV_MMU_PTE_KIND_S8                                                              0x02 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24                                                           0x03 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8                                                      0x04 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8                                                           0x05 /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE                                     0x08 /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC                         0x09 /* R---V */
+#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC                                     0x0A /* R---V */
+#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC                                    0x0B /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC                                  0x0C /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC                             0x0D /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC                                  0x0E /* R---V */
+#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                                                  0x0F /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2C                        0x2a /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2C                    0x11 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2C                    0xC3 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2C                    0x46 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2C                   0x6c /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2Z                        0x6b /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2Z                    0x10 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2Z                    0x60 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2Z                    0x61 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2Z                   0x62 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2CZ                       0x36 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2CZ                   0x37 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2CZ                   0x38 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2CZ                   0x39 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2CZ                  0x5f /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_1Z                      0x12 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z                  0x13 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z                  0x14 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z                  0x15 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z                 0x16 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CZ                     0x17 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ                 0x18 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ                 0x19 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ                 0x1a /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ                0x1b /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CS                     0x1c /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS                 0x1d /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS                 0x1e /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS                 0x1f /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS                0x20 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_4CSZV                   0x21 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV               0x22 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV               0x23 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV               0x24 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV              0x25 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12                0x26 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4                 0x27 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8                 0x28 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24                0x29 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV            0x2e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV             0x2f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV             0x30 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV            0x31 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS            0x32 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS             0x33 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS             0x34 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS            0x35 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV           0x3a /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV            0x3b /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV            0x3c /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV           0x3d /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV            0x3e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV             0x3f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV             0x40 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV            0x41 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV          0x42 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV           0x43 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV           0x44 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV          0x45 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_1Z                      0x47 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z                  0x48 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z                  0x49 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z                  0x4a /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z                 0x4b /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CS                     0x4c /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS                 0x4d /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS                 0x4e /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS                 0x4f /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS                0x50 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CZ                     0x51 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ                 0x52 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ                 0x53 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ                 0x54 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ                0x55 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_4CSZV                   0x56 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV               0x57 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV               0x58 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV               0x59 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV              0x5a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12                0x5b /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4                 0x5c /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8                 0x5d /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24                0x5e /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV            0x63 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV             0x64 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV             0x65 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV            0x66 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS            0x67 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS             0x68 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS             0x69 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS            0x6a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV           0x6f /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV            0x70 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV            0x71 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV           0x72 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV            0x73 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV             0x74 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV             0x75 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV            0x76 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV          0x77 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV           0x78 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV           0x79 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV          0x7a /* R---V */
+#define NV_MMU_PTE_KIND_ZF32                          0x7b /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_1Z                       0x7c /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_1Z                   0x7d /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_1Z                   0x7e /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_1Z                   0x7f /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_1Z                  0x80 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CS                      0x81 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CS                  0x82 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CS                  0x83 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CS                  0x84 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CS                 0x85 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CZ                      0x86 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ                  0x87 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ                  0x88 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ                  0x89 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ                 0x8a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12        0x8b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4         0x8c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8         0x8d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24        0x8e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS    0x8f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS     0x90 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS     0x91 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS    0x92 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV    0x97 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV     0x98 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV     0x99 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV    0x9a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV   0x9b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV    0x9c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV    0x9d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV   0x9e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS    0x9f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS     0xa0 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS     0xa1 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS    0xa2 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV  0xa3 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV   0xa4 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV   0xa5 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV  0xa6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12         0xa7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4          0xa8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8          0xa9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24         0xaa /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS     0xab /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS      0xac /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS      0xad /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS     0xae /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV     0xb3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV      0xb4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV      0xb5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV     0xb6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV    0xb7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV     0xb8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV     0xb9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV    0xba /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS     0xbb /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS      0xbc /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS      0xbd /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS     0xbe /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV   0xbf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV    0xc0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV    0xc1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV   0xc2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS                0xc4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS            0xc5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS            0xc6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS            0xc7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS           0xc8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV              0xce /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV          0xcf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV          0xd0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV          0xd1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV         0xd2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS                0xd3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS            0xd4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS            0xd5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS            0xd6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS           0xd7 /* R---V */
+#define NV_MMU_PTE_KIND_S8_2S                         0x2b /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_16BX2                 0xfe /* R---V */
+#define NV_MMU_PTE_KIND_C32_2C                        0xd8 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBR                      0xd9 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBA                      0xda /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CRA                      0xdb /* R---V */
+#define NV_MMU_PTE_KIND_C32_2BRA                      0xdc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2C                    0xdd /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CBR                  0xde /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_4CBRA                 0xcc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2C                    0xdf /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBR                  0xe0 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBA                  0xe1 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CRA                  0xe2 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2BRA                  0xe3 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_4CBRA                 0x2c /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C               0xe4 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA             0xe5 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2C                        0xe6 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBR                      0xe7 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBA                      0xe8 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CRA                      0xe9 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2BRA                      0xea /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2C                    0xeb /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CBR                  0xec /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_4CBRA                 0xcd /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2C                    0xed /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBR                  0xee /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBA                  0xef /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CRA                  0xf0 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2BRA                  0xf1 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_4CBRA                 0x2d /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C               0xf2 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA             0xf3 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2C                       0xf4 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2CR                      0xf5 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2C                   0xf6 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2CR                  0xf7 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2C                   0xf8 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2CR                  0xf9 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C              0xfa /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR             0xfb /* R---V */
+#define NV_MMU_PTE_KIND_X8C24                         0xfc /* R---V */
+#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE              0xfd /* R---V */
+#define NV_MMU_PTE_KIND_SMHOST_MESSAGE                0xcb /* R---V */
+#define NV_MMU_VER1_PDE                                                      /* ----G */
+#define NV_MMU_VER1_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PDE__SIZE                                              8
+#define NV_MMU_VER1_PTE                                                      /* ----G */
+#define NV_MMU_VER1_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_VER1_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER1_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_COMPTAGLINE                      (1*32+20+11):(1*32+12) /* RWXVF */
+#define NV_MMU_VER1_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PTE__SIZE                                             8
+#define NV_MMU_VER1_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_NEW_PDE                                                      /* ----G */
+#define NV_MMU_NEW_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_NO_ATS                                            5:5 /* RWXVF */
+#define NV_MMU_NEW_PDE_NO_ATS_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_NO_ATS_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PDE__SIZE                                              8
+#define NV_MMU_NEW_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS                                       5:5 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE                                  0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE                                 0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_NEW_DUAL_PDE__SIZE                                             16
+#define NV_MMU_NEW_PTE                                                      /* ----G */
+#define NV_MMU_NEW_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_NEW_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_NEW_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_NEW_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
+#define NV_MMU_NEW_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PTE__SIZE                                              8
+#define NV_MMU_VER2_PDE                                                      /* ----G */
+#define NV_MMU_VER2_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_NO_ATS                                           5:5 /* RWXVF */
+#define NV_MMU_VER2_PDE_NO_ATS_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_NO_ATS_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PDE__SIZE                                              8
+#define NV_MMU_VER2_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS                                      5:5 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE                                 0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE                                0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_VER2_DUAL_PDE__SIZE                                             16
+#define NV_MMU_VER2_PTE                                                      /* ----G */
+#define NV_MMU_VER2_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER2_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_VER2_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_VER2_PTE_COMPTAGLINE   (20+35):36 /* RWXVF */
+#define NV_MMU_VER2_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PTE__SIZE                                              8
+#define NV_MMU_CLIENT                                             /* ----G */
+#define NV_MMU_CLIENT_KIND                                    2:0 /* RWXVF */
+#define NV_MMU_CLIENT_KIND_Z16                                0x1 /* R---V */
+#define NV_MMU_CLIENT_KIND_S8                                 0x2 /* R---V */
+#define NV_MMU_CLIENT_KIND_S8Z24                              0x3 /* R---V */
+#define NV_MMU_CLIENT_KIND_ZF32_X24S8                         0x4 /* R---V */
+#define NV_MMU_CLIENT_KIND_Z24S8                              0x5 /* R---V */
+#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY                     0x6 /* R---V */
+#define NV_MMU_CLIENT_KIND_INVALID                            0x7 /* R---V */
+#endif // __tu102_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_fault.h
+++ b/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_fault.h
@@ -0,0 +1,263 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __gv100_dev_fault_h__
+#define __gv100_dev_fault_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_PFAULT_MMU_ENG_ID_GRAPHICS          64 /*       */
+#define NV_PFAULT_MMU_ENG_ID_DISPLAY           1 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GSP               2 /*       */
+#define NV_PFAULT_MMU_ENG_ID_IFB               8 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR1              4 /*       */
+#define NV_PFAULT_MMU_ENG_ID_BAR2              5 /*       */
+#define NV_PFAULT_MMU_ENG_ID_SEC               14 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PERF              9 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVDEC             10 /*       */
+#define NV_PFAULT_MMU_ENG_ID_GRCOPY            15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE0               15 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE1               16 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE2               17 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE3               18 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE4               19 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE5               20 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE6               21 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE7               22 /*       */
+#define NV_PFAULT_MMU_ENG_ID_CE8               23 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PWR_PMU           6 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PTP               3 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC0            11 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC1            12 /*       */
+#define NV_PFAULT_MMU_ENG_ID_NVENC2            13 /*       */
+#define NV_PFAULT_MMU_ENG_ID_PHYSICAL          31 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST0             32 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST1             33 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST2             34 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST3             35 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST4             36 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST5             37 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST6             38 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST7             39 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST8             40 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST9             41 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST10            42 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST11            43 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST12            44 /*       */
+#define NV_PFAULT_MMU_ENG_ID_HOST13            45 /*       */
+#define NV_PFAULT_FAULT_TYPE                             4:0 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE                  0x00000000 /*       */
+#define NV_PFAULT_FAULT_TYPE_PDE_SIZE             0x00000001 /*       */
+#define NV_PFAULT_FAULT_TYPE_PTE                  0x00000002 /*       */
+#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION   0x00000003 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK   0x00000004 /*       */
+#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION       0x00000005 /*       */
+#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION         0x00000006 /*       */
+#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION         0x00000007 /*       */
+#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /*       */
+#define NV_PFAULT_FAULT_TYPE_WORK_CREATION        0x00000009 /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /*       */
+#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE  0x0000000b /*       */
+#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND     0x0000000c /*       */
+#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION     0x0000000d /*       */
+#define NV_PFAULT_FAULT_TYPE_POISONED             0x0000000e /*       */
+#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION     0x0000000f /*       */
+#define NV_PFAULT_CLIENT                       14:8 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_0        0x00000000 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_1        0x00000001 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_2        0x00000002 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_3        0x00000003 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_4        0x00000004 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_5        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_6        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_7        0x00000007 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_0        0x00000008 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_1        0x00000009 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_2        0x0000000A /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_3        0x0000000B /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_4        0x0000000C /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_5        0x0000000D /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_6        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_7        0x0000000F /*       */
+#define NV_PFAULT_CLIENT_GPC_RAST        0x00000010 /*       */
+#define NV_PFAULT_CLIENT_GPC_GCC         0x00000011 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPCCS       0x00000012 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_0      0x00000013 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_1      0x00000014 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_2      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_GPC_PROP_3      0x00000016 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_8        0x00000021 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_9        0x00000022 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_10       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_11       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_12       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_13       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_14       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_15       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_0     0x00000029 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_1     0x0000002A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_2     0x0000002B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_3     0x0000002C /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_4     0x0000002D /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_5     0x0000002E /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_6     0x0000002F /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_7     0x00000030 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_8        0x00000031 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_9        0x00000032 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_8     0x00000033 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_9     0x00000034 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_16       0x00000035 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_17       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_18       0x00000037 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_19       0x00000038 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_10       0x00000039 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_11       0x0000003A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_10    0x0000003B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_11    0x0000003C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_20       0x0000003D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_21       0x0000003E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_22       0x0000003F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_23       0x00000040 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_12       0x00000041 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_13       0x00000042 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_12    0x00000043 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_13    0x00000044 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_24       0x00000045 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_25       0x00000046 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_26       0x00000047 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_27       0x00000048 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_14       0x00000049 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_15       0x0000004A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_14    0x0000004B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_15    0x0000004C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_28       0x0000004D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_29       0x0000004E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_30       0x0000004F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_31       0x00000050 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_16       0x00000051 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_17       0x00000052 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_16    0x00000053 /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_17    0x00000054 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_32       0x00000055 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_33       0x00000056 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_34       0x00000057 /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_35       0x00000058 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_18       0x00000059 /*       */
+#define NV_PFAULT_CLIENT_GPC_PE_19       0x0000005A /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_18    0x0000005B /*       */
+#define NV_PFAULT_CLIENT_GPC_TPCCS_19    0x0000005C /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_36       0x0000005D /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_37       0x0000005E /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_38       0x0000005F /*       */
+#define NV_PFAULT_CLIENT_GPC_T1_39       0x00000060 /*       */
+#define NV_PFAULT_CLIENT_GPC_GPM         0x00000017 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0  0x00000018 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1  0x00000019 /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2  0x0000001A /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3  0x0000001B /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4  0x0000001C /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5  0x0000001D /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6  0x0000001E /*       */
+#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7  0x0000001F /*       */
+#define NV_PFAULT_CLIENT_GPC_RGG_UTLB    0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE0         0x00000001 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE1         0x00000002 /*       */
+#define NV_PFAULT_CLIENT_HUB_DNISO       0x00000003 /*       */
+#define NV_PFAULT_CLIENT_HUB_FE          0x00000004 /*       */
+#define NV_PFAULT_CLIENT_HUB_FECS        0x00000005 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST        0x00000006 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU    0x00000007 /*       */
+#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /*       */
+#define NV_PFAULT_CLIENT_HUB_ISO         0x00000009 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU         0x0000000A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVDEC       0x0000000B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC1      0x0000000D /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC2      0x00000033 /*       */
+#define NV_PFAULT_CLIENT_HUB_NISO        0x0000000E /*       */
+#define NV_PFAULT_CLIENT_HUB_P2P         0x0000000F /*       */
+#define NV_PFAULT_CLIENT_HUB_PD          0x00000010 /*       */
+#define NV_PFAULT_CLIENT_HUB_PERF        0x00000011 /*       */
+#define NV_PFAULT_CLIENT_HUB_PMU         0x00000012 /*       */
+#define NV_PFAULT_CLIENT_HUB_RASTERTWOD  0x00000013 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC         0x00000014 /*       */
+#define NV_PFAULT_CLIENT_HUB_SCC_NB      0x00000015 /*       */
+#define NV_PFAULT_CLIENT_HUB_SEC         0x00000016 /*       */
+#define NV_PFAULT_CLIENT_HUB_SSYNC       0x00000017 /*       */
+#define NV_PFAULT_CLIENT_HUB_VIP         0x00000000 /*       */
+#define NV_PFAULT_CLIENT_HUB_GRCOPY      0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE2         0x00000018 /*       */
+#define NV_PFAULT_CLIENT_HUB_XV          0x00000019 /*       */
+#define NV_PFAULT_CLIENT_HUB_MMU_NB      0x0000001A /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC       0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_NVENC0      0x0000001B /*       */
+#define NV_PFAULT_CLIENT_HUB_DFALCON     0x0000001C /*       */
+#define NV_PFAULT_CLIENT_HUB_SKED        0x0000001D /*       */
+#define NV_PFAULT_CLIENT_HUB_AFALCON     0x0000001E /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE0       0x00000020 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE1       0x00000021 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE2       0x00000022 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE3       0x00000023 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE4       0x00000024 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE5       0x00000025 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE6       0x00000026 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE7       0x00000027 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE8       0x00000028 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSCE9       0x00000029 /*       */
+#define NV_PFAULT_CLIENT_HUB_HSHUB       0x0000002A /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X0      0x0000002B /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X1      0x0000002C /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X2      0x0000002D /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X3      0x0000002E /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X4      0x0000002F /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X5      0x00000030 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X6      0x00000031 /*       */
+#define NV_PFAULT_CLIENT_HUB_PTP_X7      0x00000032 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /*       */
+#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /*       */
+#define NV_PFAULT_CLIENT_HUB_DWBIF       0x00000036 /*       */
+#define NV_PFAULT_CLIENT_HUB_FBFALCON    0x00000037 /*       */
+#define NV_PFAULT_CLIENT_HUB_CE_SHIM     0x00000038 /*       */
+#define NV_PFAULT_CLIENT_HUB_GSP         0x00000039 /*       */
+#define NV_PFAULT_CLIENT_HUB_DONT_CARE   0x0000001F /*       */
+#define NV_PFAULT_ACCESS_TYPE                 19:16 /*       */
+#define NV_PFAULT_ACCESS_TYPE_READ       0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_WRITE      0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_ATOMIC     0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PREFETCH   0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_READ          0x00000000 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE         0x00000001 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC        0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH      0x00000003 /*       */
+#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK   0x00000004 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_READ          0x00000008 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE         0x00000009 /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC        0x0000000a /*       */
+#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH      0x0000000b /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE             20:20 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_GPC    0x00000000 /*       */
+#define NV_PFAULT_MMU_CLIENT_TYPE_HUB    0x00000001 /*       */
+#define NV_PFAULT_GPC_ID                      28:24 /*       */
+#define NV_PFAULT_PROTECTED_MODE              29:29 /*       */
+#define NV_PFAULT_ATS_FAULT                   30:30 /*       */
+#define NV_PFAULT_VALID                       31:31 /*       */
+#endif // __gv100_dev_fault_h__
--- a/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_fb.h
+++ b/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_fb.h
@@ -0,0 +1,103 @@
+/*******************************************************************************
+    Copyright (c) 2017 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+// Excerpt of gv100/dev_fb.h
+
+#ifndef __dev_fb_h__
+#define __dev_fb_h__
+
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO(i)                     (0x00100E24+(i)*20) /* RW-4A */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO__SIZE_1                          2 /*       */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE                      0:0 /* RW-VF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE_VIRTUAL       0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE_PHYSICAL      0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE                  2:1 /* RW-VF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_LOCAL     0x00000000 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_SYS_COH   0x00000002 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_SYS_NCOH  0x00000003 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_VOL                       3:3 /* RW-VF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR                         31:12 /* RW-VF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI(i)                     (0x00100E28+(i)*20) /* RW-4A */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI__SIZE_1                          2 /*       */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI_ADDR                          31:0 /* RW-VF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET(i)                         (0x00100E2C+(i)*20) /* RW-4A */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET__SIZE_1                     2 /*       */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_PTR                              19:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_PTR_RESET                  0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED                30:30 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_NO        0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_YES       0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_CLEAR     0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW                        31:31 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_NO                0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_YES               0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_CLEAR             0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT(i)                         (0x00100E30+(i)*20) /* R--4A */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT__SIZE_1                    2 /*       */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_PTR                               19:0 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_PTR_RESET                   0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED                 30:30 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO         0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_YES        0x00000001 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW                         31:31 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW_NO                 0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW_YES                0x00000001 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE(i)                    (0x00100E34+(i)*20) /* RW-4A */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE__SIZE_1                         2 /*       */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_VAL                          19:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_VAL_RESET              0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR               29:29 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR_DISABLE  0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR_ENABLE   0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT                 30:30 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT_NO         0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT_YES        0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE                      31:31 /* RWEVF */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE_FALSE           0x00000000 /* RWE-V */
+#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE_TRUE            0x00000001 /* RW--V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO                                      0x00100E4C /* R--4R */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE                               1:0 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_LOCAL                  0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_PEER                   0x00000001 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_SYS_COH                0x00000002 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_SYS_NCOH               0x00000003 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_ADDR                                      31:12 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_ADDR_RESET                           0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_HI                                      0x00100E50 /* R--4R */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_HI_ADDR                                       31:0 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_ADDR_HI_ADDR_RESET                           0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO                                      0x00100E54 /* R--4R */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_ENGINE_ID                                   8:0 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_ENGINE_ID_RESET                      0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE                                  11:10 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_VID_MEM                     0x00000000 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_SYS_MEM_COHERENT            0x00000002 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_SYS_MEM_NONCOHERENT         0x00000003 /* R---V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_RESET                       0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_ADDR                                      31:12 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_INST_LO_ADDR_RESET                           0x00000000 /* R-E-V */
+#define NV_PFB_PRI_MMU_FAULT_INST_HI                                      0x00100E58 /* R--4R */
+#define NV_PFB_PRI_MMU_FAULT_INST_HI_ADDR                                       31:0 /* R-EVF */
+#define NV_PFB_PRI_MMU_FAULT_INST_HI_ADDR_RESET                           0x00000000 /* R-E-V */
+
+#endif
--- a/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_mmu.h
+++ b/kernel-open/nvidia-uvm/hwref/volta/gv100/dev_mmu.h
@@ -0,0 +1,661 @@
+/*******************************************************************************
+    Copyright (c) 2003-2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+
+#ifndef __gv100_dev_mmu_h__
+#define __gv100_dev_mmu_h__
+/* This file is autogenerated.  Do not edit */
+#define NV_MMU_PDE                                                      /* ----G */
+#define NV_MMU_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PDE__SIZE                                              8
+#define NV_MMU_PTE                                                      /* ----G */
+#define NV_MMU_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_PTE_LOCK                               (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_LOCK_TRUE                                        0x1 /* RW--V */
+#define NV_MMU_PTE_LOCK_FALSE                                       0x0 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_PTE_COMPTAGLINE                      (1*32+18+11):(1*32+12) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE                     (1*32+30):(1*32+30) /* RWXVF */
+#define NV_MMU_PTE_READ_DISABLE_TRUE                               0x1  /* RW--V */
+#define NV_MMU_PTE_READ_DISABLE_FALSE                              0x0  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE                    (1*32+31):(1*32+31) /* RWXVF */
+#define NV_MMU_PTE_WRITE_DISABLE_TRUE                              0x1  /* RW--V */
+#define NV_MMU_PTE_WRITE_DISABLE_FALSE                             0x0  /* RW--V */
+#define NV_MMU_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_PTE__SIZE                                             8
+#define NV_MMU_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_PTE_KIND_INVALID                       0xff /* R---V */
+#define NV_MMU_PTE_KIND_PITCH                         0x00 /* R---V */
+#define NV_MMU_PTE_KIND_Z16                           0x01 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2C                        0x02 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2C                    0x03 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2C                    0x04 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2C                    0x05 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2C                   0x06 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2Z                        0x07 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2Z                    0x08 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2Z                    0x09 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2Z                    0x0a /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2Z                   0x0b /* R---V */
+#define NV_MMU_PTE_KIND_Z16_2CZ                       0x36 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_2CZ                   0x37 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_2CZ                   0x38 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_2CZ                   0x39 /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_2CZ                  0x5f /* R---V */
+#define NV_MMU_PTE_KIND_Z16_4CZ                       0x0c /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS2_4CZ                   0x0d /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS4_4CZ                   0x0e /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS8_4CZ                   0x0f /* R---V */
+#define NV_MMU_PTE_KIND_Z16_MS16_4CZ                  0x10 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24                         0x11 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_1Z                      0x12 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z                  0x13 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z                  0x14 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z                  0x15 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z                 0x16 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CZ                     0x17 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ                 0x18 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ                 0x19 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ                 0x1a /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ                0x1b /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_2CS                     0x1c /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS                 0x1d /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS                 0x1e /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS                 0x1f /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS                0x20 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_4CSZV                   0x21 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV               0x22 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV               0x23 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV               0x24 /* R---V */
+#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV              0x25 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12                0x26 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4                 0x27 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8                 0x28 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24                0x29 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV            0x2e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV             0x2f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV             0x30 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV            0x31 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS            0x32 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS             0x33 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS             0x34 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS            0x35 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV           0x3a /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV            0x3b /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV            0x3c /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV           0x3d /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV            0x3e /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV             0x3f /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV             0x40 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV            0x41 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV          0x42 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV           0x43 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV           0x44 /* R---V */
+#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV          0x45 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8                         0x46 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_1Z                      0x47 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z                  0x48 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z                  0x49 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z                  0x4a /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z                 0x4b /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CS                     0x4c /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS                 0x4d /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS                 0x4e /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS                 0x4f /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS                0x50 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_2CZ                     0x51 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ                 0x52 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ                 0x53 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ                 0x54 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ                0x55 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_4CSZV                   0x56 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV               0x57 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV               0x58 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV               0x59 /* R---V */
+#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV              0x5a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12                0x5b /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4                 0x5c /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8                 0x5d /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24                0x5e /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B8C1_2Y                    0x60 /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B8C2_2Y                    0x61 /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B10C1_2Y                   0x62 /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B10C2_2Y                   0x6b /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B12C1_2Y                   0x6c /* R---V */
+#define NV_MMU_PTE_KIND_YUV_B12C2_2Y                   0x6d /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV            0x63 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV             0x64 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV             0x65 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV            0x66 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS            0x67 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS             0x68 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS             0x69 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS            0x6a /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV           0x6f /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV            0x70 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV            0x71 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV           0x72 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV            0x73 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV             0x74 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV             0x75 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV            0x76 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV          0x77 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV           0x78 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV           0x79 /* R---V */
+#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV          0x7a /* R---V */
+#define NV_MMU_PTE_KIND_ZF32                          0x7b /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_1Z                       0x7c /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_1Z                   0x7d /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_1Z                   0x7e /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_1Z                   0x7f /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_1Z                  0x80 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CS                      0x81 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CS                  0x82 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CS                  0x83 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CS                  0x84 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CS                 0x85 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_2CZ                      0x86 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ                  0x87 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ                  0x88 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ                  0x89 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ                 0x8a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12        0x8b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4         0x8c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8         0x8d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24        0x8e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS    0x8f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS     0x90 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS     0x91 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS    0x92 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV    0x97 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV     0x98 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV     0x99 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV    0x9a /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV   0x9b /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV    0x9c /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV    0x9d /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV   0x9e /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS    0x9f /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS     0xa0 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS     0xa1 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS    0xa2 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV  0xa3 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV   0xa4 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV   0xa5 /* R---V */
+#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV  0xa6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12         0xa7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4          0xa8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8          0xa9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24         0xaa /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS     0xab /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS      0xac /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS      0xad /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS     0xae /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV     0xb3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV      0xb4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV      0xb5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV     0xb6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV    0xb7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV     0xb8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV     0xb9 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV    0xba /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS     0xbb /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS      0xbc /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS      0xbd /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS     0xbe /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV   0xbf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV    0xc0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV    0xc1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV   0xc2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8                    0xc3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS                0xc4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS            0xc5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS            0xc6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS            0xc7 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS           0xc8 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV              0xce /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV          0xcf /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV          0xd0 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV          0xd1 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV         0xd2 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS                0xd3 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS            0xd4 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS            0xd5 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS            0xd6 /* R---V */
+#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS           0xd7 /* R---V */
+#define NV_MMU_PTE_KIND_S8                            0x2a /* R---V */
+#define NV_MMU_PTE_KIND_S8_2S                         0x2b /* R---V */
+#define NV_MMU_PTE_KIND_GENERIC_16BX2                 0xfe /* R---V */
+#define NV_MMU_PTE_KIND_C32_2C                        0xd8 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBR                      0xd9 /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CBA                      0xda /* R---V */
+#define NV_MMU_PTE_KIND_C32_2CRA                      0xdb /* R---V */
+#define NV_MMU_PTE_KIND_C32_2BRA                      0xdc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2C                    0xdd /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_2CBR                  0xde /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS2_4CBRA                 0xcc /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2C                    0xdf /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBR                  0xe0 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CBA                  0xe1 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2CRA                  0xe2 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_2BRA                  0xe3 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS4_4CBRA                 0x2c /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C               0xe4 /* R---V */
+#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA             0xe5 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2C                        0xe6 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBR                      0xe7 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CBA                      0xe8 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2CRA                      0xe9 /* R---V */
+#define NV_MMU_PTE_KIND_C64_2BRA                      0xea /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2C                    0xeb /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_2CBR                  0xec /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS2_4CBRA                 0xcd /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2C                    0xed /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBR                  0xee /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CBA                  0xef /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2CRA                  0xf0 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_2BRA                  0xf1 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS4_4CBRA                 0x2d /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C               0xf2 /* R---V */
+#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA             0xf3 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2C                       0xf4 /* R---V */
+#define NV_MMU_PTE_KIND_C128_2CR                      0xf5 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2C                   0xf6 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS2_2CR                  0xf7 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2C                   0xf8 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS4_2CR                  0xf9 /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C              0xfa /* R---V */
+#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR             0xfb /* R---V */
+#define NV_MMU_PTE_KIND_X8C24                         0xfc /* R---V */
+#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE              0xfd /* R---V */
+#define NV_MMU_PTE_KIND_SMSKED_MESSAGE                0xca /* R---V */
+#define NV_MMU_PTE_KIND_SMHOST_MESSAGE                0xcb /* R---V */
+#define NV_MMU_VER1_PDE                                                      /* ----G */
+#define NV_MMU_VER1_PDE_APERTURE_BIG                       (0*32+1):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE                               (0*32+3):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_SIZE_FULL                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_HALF                                 0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_QUARTER                              0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_SIZE_EIGHTH                               0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS                   (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID                   (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER             (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL                     (1*32+1):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL                          (1*32+2):(1*32+2) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG                            (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER1_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS                 (1*32+31):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID                 (1*32+31-3):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER           (1*32+31):(1*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER1_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PDE__SIZE                                              8
+#define NV_MMU_VER1_PTE                                                      /* ----G */
+#define NV_MMU_VER1_PTE_VALID                              (0*32+0):(0*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE                          (0*32+1):(0*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY                          (0*32+2):(0*32+2) /* RWXVF */
+#define NV_MMU_VER1_PTE_READ_ONLY_TRUE                                  0x1  /* RW--V */
+#define NV_MMU_VER1_PTE_READ_ONLY_FALSE                                 0x0  /* RW--V */
+#define NV_MMU_VER1_PTE_ENCRYPTED                          (0*32+3):(0*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER1_PTE_ADDRESS_SYS                      (0*32+31):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID                      (0*32+31-3):(0*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER                (0*32+31):(0*32+32-3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0                       0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1                       0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2                       0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3                       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4                       0x00000004 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5                       0x00000005 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6                       0x00000006 /* RW--V */
+#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7                       0x00000007 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL                                (1*32+0):(1*32+0) /* RWXVF */
+#define NV_MMU_VER1_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE                           (1*32+2):(1*32+1) /* RWXVF */
+#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE                     (1*32+3):(1*32+3) /* RWXVF */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER1_PTE_COMPTAGLINE                      (1*32+18+11):(1*32+12) /* RWXVF */
+#define NV_MMU_VER1_PTE_KIND                              (1*32+11):(1*32+4) /* RWXVF */
+#define NV_MMU_VER1_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER1_PTE__SIZE                                             8
+#define NV_MMU_VER1_PTE_COMPTAGS_NONE                                    0x0 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_1                                       0x1 /*       */
+#define NV_MMU_VER1_PTE_COMPTAGS_2                                       0x2 /*       */
+#define NV_MMU_NEW_PDE                                                      /* ----G */
+#define NV_MMU_NEW_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_NO_ATS                                            5:5 /* RWXVF */
+#define NV_MMU_NEW_PDE_NO_ATS_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PDE_NO_ATS_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PDE__SIZE                                              8
+#define NV_MMU_NEW_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS                                       5:5 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE                                  0x1 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE                                 0x0 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_NEW_DUAL_PDE__SIZE                                             16
+#define NV_MMU_NEW_PTE                                                      /* ----G */
+#define NV_MMU_NEW_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_NEW_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_NEW_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_NEW_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_NEW_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_NEW_PTE_COMPTAGLINE   (18+35):36 /* RWXVF */
+#define NV_MMU_NEW_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_NEW_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_NEW_PTE__SIZE                                              8
+#define NV_MMU_VER2_PDE                                                      /* ----G */
+#define NV_MMU_VER2_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PDE_APERTURE_INVALID                          0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY                     0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PDE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PDE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_NO_ATS                                           5:5 /* RWXVF */
+#define NV_MMU_VER2_PDE_NO_ATS_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_PDE_NO_ATS_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PDE__SIZE                                              8
+#define NV_MMU_VER2_DUAL_PDE                                                      /* ----G */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE                                      0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE                                     0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE                                           0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE                                     0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG                                     2:1 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID                      0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY                 0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY       0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY   0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG                                          3:3 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE                              0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE                             0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS                                      5:5 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE                                 0x1 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE                                0x0 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS                                 53:(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID         (35-3):(8-4) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER   35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL                                 66:65 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID                    0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY               0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY     0x00000002 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL                                      67:67 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE                            0x00000001 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE                           0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS                             117:72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID      (99-3):72 /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0                  0x00000000 /* RW--V */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /*       */
+#define NV_MMU_VER2_DUAL_PDE__SIZE                                             16
+#define NV_MMU_VER2_PTE                                                      /* ----G */
+#define NV_MMU_VER2_PTE_VALID                                            0:0 /* RWXVF */
+#define NV_MMU_VER2_PTE_VALID_TRUE                                       0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_VALID_FALSE                                      0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE                                         2:1 /* RWXVF */
+#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY                     0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY                      0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY           0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY       0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL                                              3:3 /* RWXVF */
+#define NV_MMU_VER2_PTE_VOL_TRUE                                  0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_VOL_FALSE                                 0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ENCRYPTED                                        4:4 /* RWXVF */
+#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE                            0x00000001 /* R---V */
+#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE                           0x00000000 /* R---V */
+#define NV_MMU_VER2_PTE_PRIVILEGE                                        5:5 /* RWXVF */
+#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY                                        6:6 /* RWXVF */
+#define NV_MMU_VER2_PTE_READ_ONLY_TRUE                                   0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_READ_ONLY_FALSE                                  0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE                                   7:7 /* RWXVF */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE                              0x1 /* RW--V */
+#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE                             0x0 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_SYS                                     53:8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID             (35-3):8 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER       35:(36-3) /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0                        0x00000000 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1                        0x00000001 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2                        0x00000002 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3                        0x00000003 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4                        0x00000004 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5                        0x00000005 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6                        0x00000006 /* RW--V */
+#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7                        0x00000007 /* RW--V */
+#define NV_MMU_VER2_PTE_COMPTAGLINE   (18+35):36 /* RWXVF */
+#define NV_MMU_VER2_PTE_KIND                                           63:56 /* RWXVF */
+#define NV_MMU_VER2_PTE_ADDRESS_SHIFT                             0x0000000c /*       */
+#define NV_MMU_VER2_PTE__SIZE                                              8
+#define NV_MMU_BASIC                                                  /* ----G */
+#define NV_MMU_BASIC_KIND                                         3:0 /* RWXVF */
+#define NV_MMU_BASIC_KIND_TRANSPARENT                             0x0 /* R---V */
+#define NV_MMU_BASIC_KIND_GENERIC                                 0x1 /* R---V */
+#define NV_MMU_BASIC_KIND_S8                                      0x2 /* R---V */
+#define NV_MMU_BASIC_KIND_Z16                                     0x3 /* R---V */
+#define NV_MMU_BASIC_KIND_Z24S8                                   0x4 /* R---V */
+#define NV_MMU_BASIC_KIND_ZF32                                    0x5 /* R---V */
+#define NV_MMU_BASIC_KIND_ZF32_X24S8                              0x6 /* R---V */
+#define NV_MMU_BASIC_KIND_RSVRD0                                  0x7 /* R---V */
+#define NV_MMU_BASIC_KIND_PITCH                                   0x8 /* R---V */
+#define NV_MMU_BASIC_KIND_GENERIC_C                               0x9 /* R---V */
+#define NV_MMU_BASIC_KIND_S8_C                                    0xa /* R---V */
+#define NV_MMU_BASIC_KIND_Z16_C                                   0xb /* R---V */
+#define NV_MMU_BASIC_KIND_Z24S8_C                                 0xc /* R---V */
+#define NV_MMU_BASIC_KIND_ZF32_C                                  0xd /* R---V */
+#define NV_MMU_BASIC_KIND_ZF32_X24S8_C                            0xe /* R---V */
+#define NV_MMU_BASIC_KIND_INVALID                                 0xf /* R---V */
+#endif // __gv100_dev_mmu_h__
--- a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
@@ -0,0 +1,577 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nv-kthread-q.h"
+#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/string.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+
+// If NV_BUILD_MODULE_INSTANCES is not defined, do it here in order to avoid
+// build warnings/errors when including nv-linux.h as it expects the definition
+// to be present.
+#ifndef NV_BUILD_MODULE_INSTANCES
+#define NV_BUILD_MODULE_INSTANCES 1
+#endif
+#include "nv-linux.h"
+
+// Below are just a very few lines of printing and test assertion support.
+// It is important to avoid dependencies on other modules, because nv-kthread-q
+// and its self test are supposed to only require:
+//
+//    -- Linux kernel functions and macros
+//
+// In order to avoid external dependencies (specifically, NV_STATUS codes), all
+// functions in this file return non-zero upon failure, and zero upon success.
+
+#ifndef NVIDIA_PRETTY_PRINTING_PREFIX
+    #define NVIDIA_PRETTY_PRINTING_PREFIX "nvidia: "
+#endif
+
+// This prints even on release builds:
+#define NVQ_TEST_PRINT(fmt, ...)                                               \
+    printk(KERN_INFO NVIDIA_PRETTY_PRINTING_PREFIX "%s:%u[pid:%d] " fmt,       \
+           __FUNCTION__,                                                       \
+           __LINE__,                                                           \
+           current->pid,                                                       \
+           ##__VA_ARGS__)
+
+// Caution: This macro will return out of the current scope
+#define TEST_CHECK_RET(cond)                                                   \
+    do {                                                                       \
+        if (unlikely(!(cond))) {                                               \
+            NVQ_TEST_PRINT("Test check failed, condition '%s' not true\n",     \
+                           #cond);                                             \
+            on_nvq_assert();                                                   \
+            return -1;                                                         \
+        }                                                                      \
+    } while(0)
+
+// Most test failures will do things such as just hang or crash. However, in
+// order to detect bugs that are less fatal, simply count how many queue items
+// actually ran.
+
+#define NUM_Q_ITEMS_IN_BASIC_TEST       6
+#define NUM_RESCHEDULE_CALLBACKS        10
+#define NUM_TEST_Q_ITEMS                (100 * 1000)
+#define NUM_TEST_KTHREADS               8
+#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
+
+// This exists in order to have a function to place a breakpoint on:
+void on_nvq_assert(void)
+{
+    (void)NULL;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Basic start-stop test
+
+typedef struct basic_start_stop_args
+{
+    int                 value_to_write;
+    int                 *where_to_write;
+} basic_start_stop_args_t;
+
+static void _basic_start_stop_callback(void *args)
+{
+    basic_start_stop_args_t *start_stop_args = (basic_start_stop_args_t*)args;
+
+    *start_stop_args->where_to_write = start_stop_args->value_to_write;
+}
+
+static int _basic_start_stop_test(void)
+{
+    int i, was_scheduled;
+    int result = 0;
+    nv_kthread_q_item_t q_item[NUM_Q_ITEMS_IN_BASIC_TEST];
+    int callback_values_written[NUM_Q_ITEMS_IN_BASIC_TEST];
+    basic_start_stop_args_t start_stop_args[NUM_Q_ITEMS_IN_BASIC_TEST];
+    nv_kthread_q_t local_q;
+
+    // Do a redudant stop to ensure stop is supported on zero initialized memory
+    // No crash should occur
+    memset(&local_q, 0, sizeof(nv_kthread_q_t));
+    nv_kthread_q_stop(&local_q);
+
+    // Do a quick start-stop cycle first:
+    result = nv_kthread_q_init(&local_q, "q_to_stop");
+    TEST_CHECK_RET(result == 0);
+    nv_kthread_q_stop(&local_q);
+
+    // call another q_stop and it shouldn't crash and should return fine
+    nv_kthread_q_stop(&local_q);
+
+    memset(&start_stop_args, 0, sizeof(start_stop_args));
+    memset(callback_values_written, 0, sizeof(callback_values_written));
+
+    // All the callback arguments point to the same nv_kthread_q:
+    for (i = 0; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i) {
+        start_stop_args[i].value_to_write = i;
+        start_stop_args[i].where_to_write = &callback_values_written[i];
+    }
+
+    result = nv_kthread_q_init(&local_q, "basic_q");
+    TEST_CHECK_RET(result == 0);
+
+    // Launch 3 items, then flush the queue.
+    //
+    // Each iteration sends a different instance of args to the callback
+    // function.
+    for (i = 0; i < 3; ++i) {
+        nv_kthread_q_item_init(&q_item[i],
+                               _basic_start_stop_callback,
+                               &start_stop_args[i]);
+
+        was_scheduled = nv_kthread_q_schedule_q_item(&local_q, &q_item[i]);
+        result |= (!was_scheduled);
+    }
+
+    // It is legal to flush more than once, so flush twice in a row:
+    nv_kthread_q_flush(&local_q);
+    nv_kthread_q_flush(&local_q);
+
+    // Launch the remaining items, then stop (which flushes) the queue:
+    for (i = 3; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i) {
+        nv_kthread_q_item_init(&q_item[i],
+                               _basic_start_stop_callback,
+                               &start_stop_args[i]);
+
+        was_scheduled = nv_kthread_q_schedule_q_item(&local_q, &q_item[i]);
+        result |= (!was_scheduled);
+    }
+
+    nv_kthread_q_stop(&local_q);
+
+    // Verify that all the callbacks ran and wrote their values:
+    for (i = 0; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i)
+        TEST_CHECK_RET(callback_values_written[i] == i);
+
+    return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Multithreaded test
+
+typedef struct multithread_args
+{
+    nv_kthread_q_t      *test_q;
+    atomic_t            *test_wide_accumulator;
+    atomic_t             per_thread_accumulator;
+} multithread_args_t;
+
+static void _multithread_callback(void *args)
+{
+    multithread_args_t *multithread_args = (multithread_args_t*)(args);
+    atomic_inc(multithread_args->test_wide_accumulator);
+    atomic_inc(&multithread_args->per_thread_accumulator);
+}
+
+//
+// Return values:
+// 0:        Success
+// -ENOMEM:  vmalloc failed
+// -EINVAL:  test failed due to mismatched accumulator counts
+//
+static int _multithreaded_q_kthread_function(void *args)
+{
+    int i, was_scheduled;
+    int result = 0;
+    int per_thread_count;
+    int test_wide_count;
+    multithread_args_t *multithread_args = (multithread_args_t*)args;
+    nv_kthread_q_item_t *q_items;
+    size_t alloc_size = NUM_TEST_Q_ITEMS * sizeof(nv_kthread_q_item_t);
+
+    q_items = vmalloc(alloc_size);
+    if (!q_items) {
+        result = -ENOMEM;
+        goto done;
+    }
+
+    memset(q_items, 0, alloc_size);
+
+    for (i = 0; i < NUM_TEST_Q_ITEMS; ++i) {
+        nv_kthread_q_item_init(&q_items[i],
+                               _multithread_callback,
+                               multithread_args);
+
+        was_scheduled = nv_kthread_q_schedule_q_item(multithread_args->test_q,
+                                                     &q_items[i]);
+        result |= (!was_scheduled);
+    }
+
+    nv_kthread_q_flush(multithread_args->test_q);
+
+    per_thread_count = atomic_read(&multithread_args->per_thread_accumulator);
+    if (per_thread_count != NUM_TEST_Q_ITEMS) {
+        NVQ_TEST_PRINT("per_thread_count: Expected: %d, actual: %d\n",
+                       NUM_TEST_Q_ITEMS, per_thread_count);
+        goto done;
+    }
+
+    test_wide_count = atomic_read(multithread_args->test_wide_accumulator);
+    if (test_wide_count < NUM_TEST_Q_ITEMS) {
+        NVQ_TEST_PRINT("test_wide_count: Expected: >= %d, actual: %d\n",
+                       NUM_TEST_Q_ITEMS, test_wide_count);
+        goto done;
+    }
+
+done:
+    if (q_items)
+        vfree(q_items);
+
+    while (!kthread_should_stop())
+        schedule();
+
+    return result;
+}
+
+static int _multithreaded_q_test(void)
+{
+    int i, j;
+    int result = 0;
+    struct task_struct *kthreads[NUM_TEST_KTHREADS];
+    multithread_args_t multithread_args[NUM_TEST_KTHREADS];
+    nv_kthread_q_t local_q;
+    atomic_t       local_accumulator;
+
+    memset(multithread_args, 0, sizeof(multithread_args));
+    memset(kthreads, 0, sizeof(kthreads));
+    atomic_set(&local_accumulator, 0);
+
+    result = nv_kthread_q_init(&local_q, "multithread_test_q");
+    TEST_CHECK_RET(result == 0);
+
+    for (i = 0; i < NUM_TEST_KTHREADS; ++i) {
+        multithread_args[i].test_q                = &local_q;
+        multithread_args[i].test_wide_accumulator = &local_accumulator;
+
+        kthreads[i] = kthread_run(_multithreaded_q_kthread_function,
+                                  &multithread_args[i],
+                                  "nvq_test_kthread");
+
+        if (IS_ERR(kthreads[i]))
+            goto failed;
+    }
+
+    // Stop all of the test kthreads, then stop the queue. Collect any
+    // non-zero (failure) return values from the kthreads, and use those
+    // later to report a test failure.
+    for (i = 0; i < NUM_TEST_KTHREADS; ++i) {
+        result |= kthread_stop(kthreads[i]);
+    }
+
+    nv_kthread_q_stop(&local_q);
+
+    TEST_CHECK_RET(atomic_read(&local_accumulator) ==
+                   NUM_Q_ITEMS_IN_MULTITHREAD_TEST);
+    return result;
+
+failed:
+    NVQ_TEST_PRINT("kthread_run[%d] failed: errno: %ld\n",
+                   i, PTR_ERR(kthreads[i]));
+
+    // Stop any threads that had successfully started:
+    for (j = 0; j < i; ++j)
+        kthread_stop(kthreads[j]);
+
+    nv_kthread_q_stop(&local_q);
+    return -1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Self-rescheduling test
+
+typedef struct resched_args
+{
+    nv_kthread_q_t      test_q;
+    nv_kthread_q_item_t q_item;
+    atomic_t            accumulator;
+    atomic_t            stop_rescheduling_callbacks;
+    int                 test_failure;
+} resched_args_t;
+
+static void _reschedule_callback(void *args)
+{
+    int was_scheduled;
+    resched_args_t *resched_args = (resched_args_t*)args;
+
+    // This test promises to add one to accumulator, for each time through.
+    atomic_inc(&resched_args->accumulator);
+
+    if (atomic_read(&resched_args->stop_rescheduling_callbacks) == 0) {
+        nv_kthread_q_item_init(&resched_args->q_item,
+                               _reschedule_callback,
+                               resched_args);
+
+        was_scheduled = nv_kthread_q_schedule_q_item(&resched_args->test_q,
+                                                     &resched_args->q_item);
+        if (!was_scheduled) {
+            resched_args->test_failure = 1;
+        }
+    }
+
+    // Ensure thread relinquishes control else we hang in single-core environments
+    schedule();
+}
+
+// Verify that re-scheduling the same q_item, from within its own
+// callback, works.
+static int _reschedule_same_item_from_its_own_callback_test(void)
+{
+    int was_scheduled;
+    int result = 0;
+    resched_args_t resched_args;
+
+    memset(&resched_args, 0, sizeof(resched_args));
+
+    result = nv_kthread_q_init(&resched_args.test_q, "resched_test_q");
+    TEST_CHECK_RET(result == 0);
+
+    nv_kthread_q_item_init(&resched_args.q_item,
+                           _reschedule_callback,
+                           &resched_args);
+
+    was_scheduled = nv_kthread_q_schedule_q_item(&resched_args.test_q,
+                                                 &resched_args.q_item);
+    result |= (!was_scheduled);
+
+    // Wait for a few callback items to run
+    while(atomic_read(&resched_args.accumulator) < NUM_RESCHEDULE_CALLBACKS)
+        schedule();
+
+    // Stop the callbacks from rescheduling themselves. This requires two
+    // levels of flushing: one flush to wait for any callbacks that missed
+    // the .stop_rescheduling_callbacks change, and another for any pending
+    // callbacks that were scheduled from within the callback.
+    atomic_set(&resched_args.stop_rescheduling_callbacks, 1);
+
+    // Stop the queue. This is guaranteed to do a (double) flush, and that
+    // flush takes care of any pending callbacks that we rescheduled from
+    // within the callback function.
+    nv_kthread_q_stop(&resched_args.test_q);
+
+    return (result || resched_args.test_failure);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Rescheduling the exact same q_item test
+
+typedef struct same_q_item_args
+{
+    atomic_t test_accumulator;
+} same_q_item_args_t;
+
+static void _same_q_item_callback(void *args)
+{
+    same_q_item_args_t *same_q_item_args = (same_q_item_args_t*)(args);
+    atomic_inc(&same_q_item_args->test_accumulator);
+}
+
+static int _same_q_item_test(void)
+{
+    int result, i;
+    int num_scheduled = 0;
+    same_q_item_args_t  same_q_item_args;
+    nv_kthread_q_t      local_q;
+    nv_kthread_q_item_t q_item;
+
+    memset(&same_q_item_args, 0, sizeof(same_q_item_args));
+
+    result = nv_kthread_q_init(&local_q, "same_q_item_test_q");
+    TEST_CHECK_RET(result == 0);
+
+    nv_kthread_q_item_init(&q_item,
+                           _same_q_item_callback,
+                           &same_q_item_args);
+
+    // Attempt to queue up many copies of the same q_item, then stop the queue.
+    // This is an attempt to launch enough q_items that at least some of them
+    // end up being pending in the queue, and exercise the "if already pending"
+    // logic.
+    //
+    // Some manual testing indicates that launching 1000 q_items in a tight loop
+    // causes between 1 and 20 copies to run. Obviously this is extremely
+    // dependent on the particular test machine and kernel and more, but it
+    // shows that 1000 is not unreasonable.
+    for (i = 0; i < 1000; ++i)
+        num_scheduled += nv_kthread_q_schedule_q_item(&local_q, &q_item);
+
+    nv_kthread_q_stop(&local_q);
+
+    // At least one item will have run, but not necessarily any more than that.
+    TEST_CHECK_RET(atomic_read(&same_q_item_args.test_accumulator) >= 1);
+    TEST_CHECK_RET(atomic_read(&same_q_item_args.test_accumulator) == num_scheduled);
+
+    return 0;
+}
+
+// Returns true if any of the stack pages are not resident on the indicated node.
+static bool stack_mismatch(const struct task_struct *thread, int preferred_node)
+{
+    unsigned num_stack_pages, i;
+    char* stack = (char*) thread->stack;
+
+    // If the stack has not been allocated using vmalloc, the physical pages
+    // are all on the same node, so just check the first page
+    if (!is_vmalloc_addr(stack)) {
+        struct page *page = virt_to_page(stack);
+        int node = page_to_nid(page);
+
+        return node != preferred_node;
+    }
+
+    num_stack_pages = THREAD_SIZE >> PAGE_SHIFT;
+
+    // The physical pages backing the stack may be discontiguous, so check them
+    // all.
+    for (i = 0; i < num_stack_pages; i++) {
+        char *curr_stack_page = stack + i * PAGE_SIZE;
+        struct page *page = vmalloc_to_page(curr_stack_page);
+        int node = page_to_nid(page);
+
+        if (node != preferred_node)
+            return true;
+    }
+
+    return false;
+}
+
+static void _check_cpu_affinity_callback(void *args)
+{
+    struct task_struct *thread = get_current();
+    int *preferred_node = (int *) args;
+    int *ret = preferred_node + 1;
+
+    *ret = stack_mismatch(thread, *preferred_node);
+}
+
+// Verify that the stack of the kernel thread created by
+// nv_kthread_q_init_on_node is resident on the specified NUMA node.
+//
+// nv_kthread_q_init_on_node does not guarantee that the thread's stack
+// will be resident on the passed node, but in practice the preference is mostly
+// honored so we invoke the function multiple times and allow a percentage of
+// failures per node.
+static int _check_cpu_affinity_test(void)
+{
+    int result, node;
+    nv_kthread_q_t local_q;
+
+    // If the API does not support CPU affinity, check whether the correct
+    // error code is returned.
+    // Non-affinitized queue allocation has been verified by previous test
+    // so just ensure that the affinitized version also works.
+    if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
+        result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
+        TEST_CHECK_RET(result == -ENOTSUPP);
+        return 0;
+    }
+
+    for_each_online_node(node) {
+        unsigned i;
+        const unsigned max_i = 100;
+        unsigned stack_mismatches = 0;
+
+        // Allow up to 20% of the stacks to be resident on a node different from
+        // the one requested.
+        const int alloc_mismatch_percentage = 20;
+
+        // Only test on CPU nodes which have memory
+        if (!nv_numa_node_has_memory(node) || !node_state(node, N_CPU))
+            continue;
+
+        for (i = 0; i < max_i; i++) {
+            unsigned j;
+            int thread_args[2];
+            nv_kthread_q_item_t q_item;
+            char q_name[64];
+
+            nv_kthread_q_item_init(&q_item, _check_cpu_affinity_callback, thread_args);
+            snprintf(q_name, sizeof(q_name), "test_q_%d", node);
+            result = nv_kthread_q_init_on_node(&local_q, q_name, node);
+            TEST_CHECK_RET(result == 0);
+
+            // The second entry contains the value returned by the callback:
+            // 0 if no mismatch found, and 1 otherwise.
+            thread_args[0] = node;
+            thread_args[1] = 0;
+
+            // Run several iterations to ensure that the thread's stack does
+            // not migrate after initialization.
+            for (j = 0; j < 25; j++) {
+                result = nv_kthread_q_schedule_q_item(&local_q, &q_item);
+
+                // nv_kthread_q_schedule_q_item() returns non-zero value if the
+                // item was successfully scheduled.
+                if (result == 0) {
+                    nv_kthread_q_stop(&local_q);
+                    TEST_CHECK_RET(false);
+                }
+
+                nv_kthread_q_flush(&local_q);
+
+                // Count as failure if any of the stack pages is resident on a
+                // another node on any iteration.
+                if (thread_args[1] == 1) {
+                    stack_mismatches++;
+                    break;
+                }
+            }
+
+            nv_kthread_q_stop(&local_q);
+
+            if ((100 * stack_mismatches / max_i) > alloc_mismatch_percentage)
+                TEST_CHECK_RET(false);
+        }
+    }
+    return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Top-level test entry point
+
+int nv_kthread_q_run_self_test(void)
+{
+    int result;
+
+    result = _basic_start_stop_test();
+    TEST_CHECK_RET(result == 0);
+
+    result = _reschedule_same_item_from_its_own_callback_test();
+    TEST_CHECK_RET(result == 0);
+
+    result = _multithreaded_q_test();
+    TEST_CHECK_RET(result == 0);
+
+    result = _same_q_item_test();
+    TEST_CHECK_RET(result == 0);
+
+    result = _check_cpu_affinity_test();
+    TEST_CHECK_RET(result == 0);
+
+    return 0;
+}
--- a/kernel-open/nvidia-uvm/nv-kthread-q.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q.c
@@ -0,0 +1,335 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv-kthread-q.h"
+#include "nv-list-helpers.h"
+
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#if defined(NV_LINUX_BUG_H_PRESENT)
+    #include <linux/bug.h>
+#else
+    #include <asm/bug.h>
+#endif
+
+// Today's implementation is a little simpler and more limited than the
+// API description allows for in nv-kthread-q.h. Details include:
+//
+// 1. Each nv_kthread_q instance is a first-in, first-out queue.
+//
+// 2. Each nv_kthread_q instance is serviced by exactly one kthread.
+//
+// You can create any number of queues, each of which gets its own
+// named kernel thread (kthread). You can then insert arbitrary functions
+// into the queue, and those functions will be run in the context of the
+// queue's kthread.
+
+#ifndef WARN
+    // Only *really* old kernels (2.6.9) end up here. Just use a simple printk
+    // to implement this, because such kernels won't be supported much longer.
+    #define WARN(condition, format...) ({                    \
+        int __ret_warn_on = !!(condition);                   \
+        if (unlikely(__ret_warn_on))                         \
+            printk(KERN_ERR format);                         \
+        unlikely(__ret_warn_on);                             \
+    })
+#endif
+
+#define NVQ_WARN(fmt, ...)                                   \
+    do {                                                     \
+        if (in_interrupt()) {                                \
+            WARN(1, "nv_kthread_q: [in interrupt]: " fmt,    \
+            ##__VA_ARGS__);                                  \
+        }                                                    \
+        else {                                               \
+            WARN(1, "nv_kthread_q: task: %s: " fmt,          \
+                 current->comm,                              \
+                 ##__VA_ARGS__);                             \
+        }                                                    \
+    } while (0)
+
+static int _main_loop(void *args)
+{
+    nv_kthread_q_t *q = (nv_kthread_q_t *)args;
+    nv_kthread_q_item_t *q_item = NULL;
+    unsigned long flags;
+
+    while (1) {
+        // Normally this thread is never interrupted. However,
+        // down_interruptible (instead of down) is called here,
+        // in order to avoid being classified as a potentially
+        // hung task, by the kernel watchdog.
+        while (down_interruptible(&q->q_sem))
+            NVQ_WARN("Interrupted during semaphore wait\n");
+
+        if (atomic_read(&q->main_loop_should_exit))
+            break;
+
+        spin_lock_irqsave(&q->q_lock, flags);
+
+        // The q_sem semaphore prevents us from getting here unless there is
+        // at least one item in the list, so an empty list indicates a bug.
+        if (unlikely(list_empty(&q->q_list_head))) {
+            spin_unlock_irqrestore(&q->q_lock, flags);
+            NVQ_WARN("_main_loop: Empty queue: q: 0x%p\n", q);
+            continue;
+        }
+
+        // Consume one item from the queue
+        q_item = list_first_entry(&q->q_list_head,
+                                   nv_kthread_q_item_t,
+                                   q_list_node);
+
+        list_del_init(&q_item->q_list_node);
+
+        spin_unlock_irqrestore(&q->q_lock, flags);
+
+        // Run the item
+        q_item->function_to_run(q_item->function_args);
+
+        // Make debugging a little simpler by clearing this between runs:
+        q_item = NULL;
+    }
+
+    while (!kthread_should_stop())
+        schedule();
+
+    return 0;
+}
+
+void nv_kthread_q_stop(nv_kthread_q_t *q)
+{
+    // check if queue has been properly initialized
+    if (unlikely(!q->q_kthread))
+        return;
+
+    nv_kthread_q_flush(q);
+
+    // If this assertion fires, then a caller likely either broke the API rules,
+    // by adding items after calling nv_kthread_q_stop, or possibly messed up
+    // with inadequate flushing of self-rescheduling q_items.
+    if (unlikely(!list_empty(&q->q_list_head)))
+        NVQ_WARN("list not empty after flushing\n");
+
+    if (likely(!atomic_read(&q->main_loop_should_exit))) {
+
+        atomic_set(&q->main_loop_should_exit, 1);
+
+        // Wake up the kthread so that it can see that it needs to stop:
+        up(&q->q_sem);
+
+        kthread_stop(q->q_kthread);
+        q->q_kthread = NULL;
+    }
+}
+
+// When CONFIG_VMAP_STACK is defined, the kernel thread stack allocator used by
+// kthread_create_on_node relies on a 2 entry, per-core cache to minimize
+// vmalloc invocations. The cache is NUMA-unaware, so when there is a hit, the
+// stack location ends up being a function of the core assigned to the current
+// thread, instead of being a function of the specified NUMA node. The cache was
+// added to the kernel in commit ac496bf48d97f2503eaa353996a4dd5e4383eaf0
+// ("fork: Optimize task creation by caching two thread stacks per CPU if
+// CONFIG_VMAP_STACK=y")
+//
+// To work around the problematic cache, we create up to three kernel threads
+//   -If the first thread's stack is resident on the preferred node, return this
+//    thread.
+//   -Otherwise, create a second thread. If its stack is resident on the
+//    preferred node, stop the first thread and return this one.
+//   -Otherwise, create a third thread. The stack allocator does not find a
+//    cached stack, and so falls back to vmalloc, which takes the NUMA hint into
+//    consideration. The first two threads are then stopped.
+//
+// When CONFIG_VMAP_STACK is not defined, the first kernel thread is returned.
+//
+// This function is never invoked when there is no NUMA preference (preferred
+// node is NUMA_NO_NODE).
+#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
+static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
+                                                 nv_kthread_q_t *q,
+                                                 int preferred_node,
+                                                 const char *q_name)
+{
+
+    unsigned i, j;
+    const static unsigned attempts = 3;
+    struct task_struct *thread[3];
+
+    for (i = 0;; i++) {
+        struct page *stack;
+
+        thread[i] = kthread_create_on_node(threadfn, q, preferred_node, q_name);
+
+        if (unlikely(IS_ERR(thread[i]))) {
+
+            // Instead of failing, pick the previous thread, even if its
+            // stack is not allocated on the preferred node.
+            if (i > 0)
+                i--;
+
+            break;
+        }
+
+        // vmalloc is not used to allocate the stack, so simply return the
+        // thread, even if its stack may not be allocated on the preferred node
+        if (!is_vmalloc_addr(thread[i]->stack))
+            break;
+
+        // Ran out of attempts - return thread even if its stack may not be
+        // allocated on the preferred node
+        if ((i == (attempts - 1)))
+            break;
+
+        // Get the NUMA node where the first page of the stack is resident. If
+        // it is the preferred node, select this thread.
+        stack = vmalloc_to_page(thread[i]->stack);
+        if (page_to_nid(stack) == preferred_node)
+            break;
+    }
+
+    for (j = i; j > 0; j--)
+        kthread_stop(thread[j - 1]);
+
+    return thread[i];
+}
+#endif
+
+int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
+{
+    memset(q, 0, sizeof(*q));
+
+    INIT_LIST_HEAD(&q->q_list_head);
+    spin_lock_init(&q->q_lock);
+    sema_init(&q->q_sem, 0);
+
+    if (preferred_node == NV_KTHREAD_NO_NODE) {
+        q->q_kthread = kthread_create(_main_loop, q, q_name);
+    }
+    else {
+#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
+        q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
+#else
+        return -ENOTSUPP;
+#endif
+    }
+
+    if (IS_ERR(q->q_kthread)) {
+        int err = PTR_ERR(q->q_kthread);
+
+        // Clear q_kthread before returning so that nv_kthread_q_stop() can be
+        // safely called on it making error handling easier.
+        q->q_kthread = NULL;
+
+        return err;
+    }
+
+    wake_up_process(q->q_kthread);
+
+    return 0;
+}
+
+// Returns true (non-zero) if the item was actually scheduled, and false if the
+// item was already pending in a queue.
+static int _raw_q_schedule(nv_kthread_q_t *q, nv_kthread_q_item_t *q_item)
+{
+    unsigned long flags;
+    int ret = 1;
+
+    spin_lock_irqsave(&q->q_lock, flags);
+
+    if (likely(list_empty(&q_item->q_list_node)))
+        list_add_tail(&q_item->q_list_node, &q->q_list_head);
+    else
+        ret = 0;
+
+    spin_unlock_irqrestore(&q->q_lock, flags);
+
+    if (likely(ret))
+        up(&q->q_sem);
+
+    return ret;
+}
+
+void nv_kthread_q_item_init(nv_kthread_q_item_t *q_item,
+                            nv_q_func_t function_to_run,
+                            void *function_args)
+{
+    INIT_LIST_HEAD(&q_item->q_list_node);
+    q_item->function_to_run = function_to_run;
+    q_item->function_args   = function_args;
+}
+
+// Returns true (non-zero) if the q_item got scheduled, false otherwise.
+int nv_kthread_q_schedule_q_item(nv_kthread_q_t *q,
+                                 nv_kthread_q_item_t *q_item)
+{
+    if (unlikely(atomic_read(&q->main_loop_should_exit))) {
+        NVQ_WARN("Not allowed: nv_kthread_q_schedule_q_item was "
+                   "called with a non-alive q: 0x%p\n", q);
+        return 0;
+    }
+
+    return _raw_q_schedule(q, q_item);
+}
+
+static void _q_flush_function(void *args)
+{
+    struct completion *completion = (struct completion *)args;
+    complete(completion);
+}
+
+
+static void _raw_q_flush(nv_kthread_q_t *q)
+{
+    nv_kthread_q_item_t q_item;
+    DECLARE_COMPLETION(completion);
+
+    nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
+
+    _raw_q_schedule(q, &q_item);
+
+    // Wait for the flush item to run. Once it has run, then all of the
+    // previously queued items in front of it will have run, so that means
+    // the flush is complete.
+    wait_for_completion(&completion);
+}
+
+void nv_kthread_q_flush(nv_kthread_q_t *q)
+{
+    if (unlikely(atomic_read(&q->main_loop_should_exit))) {
+        NVQ_WARN("Not allowed: nv_kthread_q_flush was called after "
+                   "nv_kthread_q_stop. q: 0x%p\n", q);
+        return;
+    }
+
+    // This 2x flush is not a typing mistake. The queue really does have to be
+    // flushed twice, in order to take care of the case of a q_item that
+    // reschedules itself.
+    _raw_q_flush(q);
+    _raw_q_flush(q);
+}
--- a/kernel-open/nvidia-uvm/nvCpuUuid.c
+++ b/kernel-open/nvidia-uvm/nvCpuUuid.c
@@ -0,0 +1,34 @@
+/*******************************************************************************
+    Copyright (c) 2015-2018 NVidia Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#include "nvtypes.h"
+#include "nvCpuUuid.h"
+
+const NvProcessorUuid NV_PROCESSOR_UUID_CPU_DEFAULT =
+{
+    {
+       // Produced via uuidgen(1): 73772a14-2c41-4750-a27b-d4d74e0f5ea6:
+       0xa6, 0x5e, 0x0f, 0x4e, 0xd7, 0xd4, 0x7b, 0xa2,
+       0x50, 0x47, 0x41, 0x2c, 0x14, 0x2a, 0x77, 0x73
+    }
+};
+
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -0,0 +1,113 @@
+NVIDIA_UVM_SOURCES ?=
+NVIDIA_UVM_SOURCES_CXX ?=
+
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q-selftest.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tools.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_global.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_isr.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_procfs.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_space.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_space_mm.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_policy.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_replayable_faults.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_non_replayable_faults.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_access_counters.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_events.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_module.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pte_batch.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tlb_batch.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_push.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pushbuffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_sysmem.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_gpu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_migrate.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_populate_pageable.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_migrate_pageable.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_map_external.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_user_channel.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_push_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ce_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_host_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_sysmem_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_events_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_module_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_get_rm_ptes_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fault_buffer_flush_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_peer_identity_mappings_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -0,0 +1,112 @@
+###########################################################################
+# Kbuild fragment for nvidia-uvm.ko
+###########################################################################
+
+UVM_BUILD_TYPE = release
+
+#
+# Define NVIDIA_UVM_{SOURCES,OBJECTS}
+#
+
+NVIDIA_UVM_OBJECTS =
+
+include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
+NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
+
+# Some linux kernel functions rely on being built with optimizations on and
+# to work around this we put wrappers for them in a separate file that's built
+# with optimizations on in debug builds and skipped in other builds.
+# Notably gcc 4.4 supports per function optimization attributes that would be
+# easier to use, but is too recent to rely on for now.
+NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
+NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
+
+ifneq ($(UVM_BUILD_TYPE),debug)
+  # Only build the wrappers on debug builds
+  NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
+endif
+
+obj-m += nvidia-uvm.o
+nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
+
+NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
+
+#
+# Define nvidia-uvm.ko-specific CFLAGS.
+#
+
+ifeq ($(UVM_BUILD_TYPE),debug)
+  NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
+else
+  ifeq ($(UVM_BUILD_TYPE),develop)
+    # -DDEBUG is required, in order to allow pr_devel() print statements to
+    # work:
+    NVIDIA_UVM_CFLAGS += -DDEBUG
+    NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
+  endif
+  NVIDIA_UVM_CFLAGS += -O2
+endif
+
+NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
+NVIDIA_UVM_CFLAGS += -DNVIDIA_UNDEF_LEGACY_BIT_MACROS
+
+NVIDIA_UVM_CFLAGS += -DLinux
+NVIDIA_UVM_CFLAGS += -D__linux__
+NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
+
+$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
+
+ifeq ($(UVM_BUILD_TYPE),debug)
+  # Force optimizations on for the wrappers
+  $(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
+endif
+
+#
+# Register the conftests needed by nvidia-uvm.ko
+#
+
+NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
+
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_bus_address
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
+
+NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
+NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
+NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
+NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
+NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
+NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
+NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
+NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
+NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
+NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
+NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
+NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
+NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
+NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
+NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
+
+NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
--- a/kernel-open/nvidia-uvm/nvstatus.c
+++ b/kernel-open/nvidia-uvm/nvstatus.c
@@ -0,0 +1,82 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvstatus.h"
+
+#if !defined(NV_PRINTF_STRING_SECTION)
+#if defined(NVRM) && NVCPU_IS_RISCV64
+#define NV_PRINTF_STRING_SECTION         __attribute__ ((section (".logging")))
+#else // defined(NVRM) && NVCPU_IS_RISCV64
+#define NV_PRINTF_STRING_SECTION
+#endif // defined(NVRM) && NVCPU_IS_RISCV64
+#endif // !defined(NV_PRINTF_STRING_SECTION)
+
+/*
+ * Include nvstatuscodes.h twice.  Once for creating constant strings in the
+ * the NV_PRINTF_STRING_SECTION section of the ececutable, and once to build
+ * the g_StatusCodeList table.
+ */
+#undef NV_STATUS_CODE
+#undef SDK_NVSTATUSCODES_H
+#define NV_STATUS_CODE( name, code, string ) static NV_PRINTF_STRING_SECTION   \
+    const char rm_pvt_##name##_str[] = string " [" #name "]";
+#include "nvstatuscodes.h"
+
+#undef NV_STATUS_CODE
+#undef SDK_NVSTATUSCODES_H
+#define NV_STATUS_CODE( name, code, string ) { name, rm_pvt_##name##_str },
+static struct NvStatusCodeString
+{
+    NV_STATUS   statusCode;
+    const char *statusString;
+} g_StatusCodeList[] = {
+   #include "nvstatuscodes.h"
+   { 0xffffffff, "Unknown error code!" } // Some compilers don't like the trailing ','
+};
+#undef NV_STATUS_CODE
+
+/*!
+ * @brief Given an NV_STATUS code, returns the corresponding status string.
+ *
+ * @param[in]   nvStatusIn                  NV_STATUS code for which the string is required
+ *
+ * @returns     Corresponding status string from the nvstatuscodes.h
+ *
+ * TODO: Bug 200025711: convert this to an array-indexed lookup, instead of a linear search
+ *
+*/
+const char *nvstatusToString(NV_STATUS nvStatusIn)
+{
+    static NV_PRINTF_STRING_SECTION const char rm_pvt_UNKNOWN_str[] = "Unknown error code!";
+    NvU32 i;
+    NvU32 n = ((NvU32)(sizeof(g_StatusCodeList))/(NvU32)(sizeof(g_StatusCodeList[0])));
+    for (i = 0; i < n; i++)
+    {
+        if (g_StatusCodeList[i].statusCode == nvStatusIn)
+        {
+            return g_StatusCodeList[i].statusString;
+        }
+    }
+
+    return rm_pvt_UNKNOWN_str;
+}
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -0,0 +1,102 @@
+/*******************************************************************************
+    Copyright (c) 2018-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_global.h"
+#include "uvm_hal.h"
+#include "uvm_gpu.h"
+#include "uvm_mem.h"
+#include "uvm_ampere_fault_buffer.h"
+
+void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
+{
+    parent_gpu->tlb_batch.va_invalidate_supported = true;
+
+    parent_gpu->tlb_batch.va_range_invalidate_supported = true;
+
+    // TODO: Bug 1767241: Run benchmarks to figure out a good number
+    parent_gpu->tlb_batch.max_ranges = 8;
+
+    parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
+
+    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
+    {
+        uvm_fault_buffer_entry_t *dummy;
+        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
+    }
+
+    // A single top level PDE on Ampere covers 128 TB and that's the minimum
+    // size that can be used.
+    parent_gpu->rm_va_base = 0;
+    parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
+
+    parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
+
+    // See uvm_mmu.h for mapping placement
+    parent_gpu->flat_vidmem_va_base = 132ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
+
+    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
+
+    // Not all units on Ampere support 49-bit addressing, including those which
+    // access channel buffers.
+    parent_gpu->max_channel_va = 1ULL << 40;
+
+    parent_gpu->max_host_va = 1ULL << 40;
+
+    // Ampere can map sysmem with any page size
+    parent_gpu->can_map_sysmem_with_large_pages = true;
+
+    // Prefetch instructions will generate faults
+    parent_gpu->prefetch_fault_supported = true;
+
+    // Ampere can place GPFIFO in vidmem
+    parent_gpu->gpfifo_in_vidmem_supported = true;
+
+    parent_gpu->replayable_faults_supported = true;
+
+    parent_gpu->non_replayable_faults_supported = true;
+
+    parent_gpu->access_counters_supported = true;
+
+    parent_gpu->fault_cancel_va_supported = true;
+
+    parent_gpu->scoped_atomics_supported = true;
+
+    parent_gpu->has_clear_faulted_channel_sw_method = true;
+
+    parent_gpu->has_clear_faulted_channel_method = false;
+
+    parent_gpu->smc.supported = true;
+
+    parent_gpu->sparse_mappings_supported = true;
+
+    UVM_ASSERT(parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100);
+    if (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 ||
+        parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000)
+        parent_gpu->map_remap_larger_page_promotion = true;
+    else
+        parent_gpu->map_remap_larger_page_promotion = false;
+
+    parent_gpu->plc_supported = true;
+}
--- a/kernel-open/nvidia-uvm/uvm_ampere_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_ce.c
@@ -0,0 +1,230 @@
+/*******************************************************************************
+    Copyright (c) 2018-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_hal_types.h"
+#include "clc6b5.h"
+#include "clc7b5.h"
+#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
+
+bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+{
+    if (!uvm_channel_is_proxy(push->channel))
+        return true;
+
+    switch (method_address) {
+        case NVC56F_SET_OBJECT:
+        case NVC6B5_SET_SEMAPHORE_A:
+        case NVC6B5_SET_SEMAPHORE_B:
+        case NVC6B5_SET_SEMAPHORE_PAYLOAD:
+        case NVC6B5_SET_SRC_PHYS_MODE:
+        case NVC6B5_SET_DST_PHYS_MODE:
+        case NVC6B5_LAUNCH_DMA:
+        case NVC6B5_OFFSET_IN_UPPER:
+        case NVC6B5_OFFSET_IN_LOWER:
+        case NVC6B5_OFFSET_OUT_UPPER:
+        case NVC6B5_OFFSET_OUT_LOWER:
+        case NVC6B5_LINE_LENGTH_IN:
+        case NVC6B5_SET_REMAP_CONST_A:
+        case NVC6B5_SET_REMAP_CONST_B:
+        case NVC6B5_SET_REMAP_COMPONENTS:
+            return true;
+    }
+
+    UVM_ERR_PRINT("Unsupported CE method 0x%x\n", method_address);
+    return false;
+}
+
+static NvU32 ce_aperture(uvm_aperture_t aperture)
+{
+    BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
+                 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
+    BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
+                 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
+    BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
+                 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
+
+    if (aperture == UVM_APERTURE_SYS) {
+        return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
+    }
+    else if (aperture == UVM_APERTURE_VID) {
+        return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
+    }
+    else {
+        return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
+               HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
+               HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
+    }
+}
+
+// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
+// flags
+NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    NvU32 launch_dma_src_dst_type = 0;
+
+    if (src.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
+
+    if (dst.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
+
+    if (!src.is_virtual && !dst.is_virtual) {
+        NV_PUSH_2U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
+                         SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+    else if (!src.is_virtual) {
+        NV_PUSH_1U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
+    }
+    else if (!dst.is_virtual) {
+        NV_PUSH_1U(C6B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+
+    return launch_dma_src_dst_type;
+}
+
+NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
+{
+    return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
+}
+
+bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    NvU64 push_begin_gpu_va;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+        return true;
+
+    if (uvm_channel_is_proxy(push->channel)) {
+        if (dst.is_virtual) {
+            UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
+            return false;
+        }
+
+        if (dst.aperture != UVM_APERTURE_VID) {
+            UVM_ERR_PRINT("Destination address of memcopy must be in vidmem\n");
+            return false;
+        }
+
+        // The source address is irrelevant, since it is a pushbuffer offset
+        if (!IS_ALIGNED(dst.address, 8)){
+            UVM_ERR_PRINT("Destination address of memcopy is not 8-byte aligned");
+            return false;
+        }
+
+        if (!src.is_virtual) {
+            UVM_ERR_PRINT("Source address of memcopy must be virtual\n");
+            return false;
+        }
+
+        push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
+
+        if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
+            UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
+            return false;
+        }
+    }
+    else {
+        // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
+        // UVM internal channel cannot use peer physical addresses.
+        if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
+            UVM_ERR_PRINT("Destination address of memcopy must be virtual, not physical (aperture: %s)\n",
+                          uvm_gpu_address_aperture_string(dst));
+            return false;
+        }
+
+        if (!src.is_virtual && !uvm_aperture_is_peer(src.aperture)) {
+            UVM_ERR_PRINT("Source address of memcopy must be virtual, not physical (aperture: %s)\n",
+                          uvm_gpu_address_aperture_string(src));
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// In SR-IOV heavy (GA100 only), the UVM driver is expected to push a patched
+// version of an inlined memcopy to the proxy channels. The patching consists in
+// passing the offset of the inlined data within the push as the source virtual
+// address, instead of passing its GPU VA.
+//
+// Copies pushed to internal channels use the GPU VA of the inlined data,
+// irrespective of the virtualization mode.
+void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
+{
+    if (!uvm_channel_is_proxy(push->channel))
+        return;
+
+    src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
+}
+
+bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+        return true;
+
+    if (uvm_channel_is_proxy(push->channel)) {
+        if (dst.is_virtual) {
+            UVM_ERR_PRINT("Destination address of memset must be physical, not virtual\n");
+            return false;
+        }
+
+        if (dst.aperture != UVM_APERTURE_VID) {
+            UVM_ERR_PRINT("Destination address of memset must be in vidmem\n");
+            return false;
+        }
+
+        if (!IS_ALIGNED(dst.address, 8)){
+            UVM_ERR_PRINT("Destination address of memset is not 8-byte aligned");
+            return false;
+        }
+
+        // Disallow memsets that don't match the page table/directory entry
+        // size. PDE0 entries are 16 bytes wide, but those are written using a
+        // memcopy.
+        //
+        // The memset size is not checked to be a multiple of the element size
+        // because the check is not exclusive of SR-IOV heavy, and it is already
+        // present in the uvm_hal_*_memset_* functions.
+        if (element_size != 8) {
+            UVM_ERR_PRINT("Memset data must be 8 bytes wide, but found %zu instead\n", element_size);
+            return false;
+        }
+    }
+    // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
+    // UVM internal channel cannot use peer physical addresses.
+    else if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
+        UVM_ERR_PRINT("Destination address of memset must be virtual, not physical (aperture: %s)\n",
+                      uvm_gpu_address_aperture_string(dst));
+        return false;
+    }
+
+    return true;
+}
--- a/kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.h
+++ b/kernel-open/nvidia-uvm/uvm_ampere_fault_buffer.h
@@ -0,0 +1,88 @@
+/*******************************************************************************
+    Copyright (c) 2018-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_HAL_AMPERE_FAULT_BUFFER_H__
+#define __UVM_HAL_AMPERE_FAULT_BUFFER_H__
+
+#include "nvtypes.h"
+#include "uvm_common.h"
+#include "uvm_gpu.h"
+
+// There are up to 8 TPCs per GPC in Ampere, and there are 2 LTP uTLB per TPC.
+// Besides, there is one RGG uTLB per GPC. Each TPC has a number of clients
+// that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests from
+// these units are routed as follows to the 2 LTP uTLBs:
+//
+// --------                    ---------
+// | T1_0 | -----------------> | uTLB0 |
+// --------                    ---------
+//
+// --------                    ---------
+// | T1_1 | -----------------> | uTLB1 |
+// --------          --------> ---------
+//                   |             ^
+// -------           |             |
+// | PE  | -----------             |
+// -------                         |
+//                                 |
+// ---------                       |
+// | TPCCS | -----------------------
+// ---------
+//
+//
+// The client ids are local to their GPC and the id mapping is linear across
+// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
+//
+// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
+// be ignored. These will never be reported in a fault message, and should
+// never be used in an invalidate. Therefore, we define our own values.
+typedef enum {
+    UVM_AMPERE_GPC_UTLB_ID_RGG = 0,
+    UVM_AMPERE_GPC_UTLB_ID_LTP0 = 1,
+    UVM_AMPERE_GPC_UTLB_ID_LTP1 = 2,
+    UVM_AMPERE_GPC_UTLB_ID_LTP2 = 3,
+    UVM_AMPERE_GPC_UTLB_ID_LTP3 = 4,
+    UVM_AMPERE_GPC_UTLB_ID_LTP4 = 5,
+    UVM_AMPERE_GPC_UTLB_ID_LTP5 = 6,
+    UVM_AMPERE_GPC_UTLB_ID_LTP6 = 7,
+    UVM_AMPERE_GPC_UTLB_ID_LTP7 = 8,
+    UVM_AMPERE_GPC_UTLB_ID_LTP8 = 9,
+    UVM_AMPERE_GPC_UTLB_ID_LTP9 = 10,
+    UVM_AMPERE_GPC_UTLB_ID_LTP10 = 11,
+    UVM_AMPERE_GPC_UTLB_ID_LTP11 = 12,
+    UVM_AMPERE_GPC_UTLB_ID_LTP12 = 13,
+    UVM_AMPERE_GPC_UTLB_ID_LTP13 = 14,
+    UVM_AMPERE_GPC_UTLB_ID_LTP14 = 15,
+    UVM_AMPERE_GPC_UTLB_ID_LTP15 = 16,
+
+    UVM_AMPERE_GPC_UTLB_COUNT,
+} uvm_ampere_gpc_utlb_id_t;
+
+static NvU32 uvm_ampere_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
+{
+    NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
+    UVM_ASSERT(utlbs <= UVM_AMPERE_GPC_UTLB_COUNT);
+    return utlbs;
+}
+
+#endif
--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -0,0 +1,435 @@
+/*******************************************************************************
+    Copyright (c) 2018-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_global.h"
+#include "uvm_user_channel.h"
+#include "uvm_push_macros.h"
+#include "hwref/ampere/ga100/dev_runlist.h"
+#include "clc56f.h"
+#include "clc076.h"
+
+bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+        return true;
+
+    if (uvm_channel_is_privileged(push->channel)) {
+        switch (method_address) {
+            case NVC56F_SET_OBJECT:
+            case NVC56F_NON_STALL_INTERRUPT:
+            case NVC56F_MEM_OP_A:
+            case NVC56F_MEM_OP_B:
+            case NVC56F_MEM_OP_C:
+            case NVC56F_MEM_OP_D:
+            case NVC56F_SEM_ADDR_LO:
+            case NVC56F_SEM_ADDR_HI:
+            case NVC56F_SEM_PAYLOAD_LO:
+            case NVC56F_SEM_PAYLOAD_HI:
+            case NVC56F_SEM_EXECUTE:
+            case NVC56F_WFI:
+            case NVC56F_NOP:
+                return true;
+        }
+
+        UVM_ERR_PRINT("Unsupported Host method 0x%x\n", method_address);
+        return false;
+    }
+    else if (method_address == NVC56F_MEM_OP_D) {
+        NvU32 operation = READ_HWVALUE(method_data, C56F, MEM_OP_D, OPERATION);
+
+        // Prohibit privileged operations from being pushed to non-privileged
+        // channels.
+
+        // TLB invalidations.
+        if ((operation == NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE) ||
+            (operation == NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED)) {
+            UVM_ERR_PRINT("Pushed privileged operation 0x%x to non-privileged channel\n", operation);
+            return false;
+        }
+
+        // Access counter clearing is a privileged operation. But access
+        // counters are not supported on SR-IOV heavy, so the presence of the
+        // operation indicates a missing check for access counters support.
+        if (operation == NVC56F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR) {
+            UVM_ERR_PRINT("Pushed access counters operation 0x%x, but access counters are not supported\n", operation);
+            return false;
+        }
+   }
+
+   return true;
+}
+
+bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+{
+    if (!uvm_channel_is_proxy(push->channel))
+        return true;
+
+    switch (method_address) {
+        case NVC076_SET_OBJECT:
+        case NVC076_CLEAR_FAULTED_A:
+        case NVC076_CLEAR_FAULTED_B:
+        case NVC076_FAULT_CANCEL_A:
+        case NVC076_FAULT_CANCEL_B:
+        case NVC076_FAULT_CANCEL_C:
+            return true;
+    }
+
+    UVM_ERR_PRINT("Unsupported SW method 0x%x\n", method_address);
+    return false;
+}
+
+void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
+                                                        const uvm_fault_buffer_entry_t *fault)
+{
+    uvm_spin_loop_t spin;
+    NvU32 channel_faulted_mask = 0;
+    NvU32 clear_type_value = 0;
+
+    UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
+
+    if (fault->fault_source.mmu_engine_type == UVM_MMU_ENGINE_TYPE_HOST) {
+        clear_type_value = NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED;
+        channel_faulted_mask = HWCONST(_CHRAM, CHANNEL, PBDMA_FAULTED, TRUE);
+    }
+    else if (fault->fault_source.mmu_engine_type == UVM_MMU_ENGINE_TYPE_CE) {
+        clear_type_value = NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED;
+        channel_faulted_mask = HWCONST(_CHRAM, CHANNEL, ENG_FAULTED, TRUE);
+    }
+    else {
+        UVM_ASSERT_MSG(false, "Unsupported MMU engine type %s\n",
+                       uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
+    }
+
+    // Wait for the channel to have the FAULTED bit set as this can race with
+    // interrupt notification
+    UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
+
+    UVM_GPU_WRITE_ONCE(*user_channel->chram_channel_register, clear_type_value);
+
+    wmb();
+
+    UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
+}
+
+static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
+{
+    switch (aperture) {
+        case UVM_APERTURE_SYS:
+            return HWCONST(C076, CLEAR_FAULTED_A, INST_APERTURE, SYS_MEM_COHERENT);
+        case UVM_APERTURE_VID:
+            return HWCONST(C076, CLEAR_FAULTED_A, INST_APERTURE, VID_MEM);
+        default:
+            UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
+    }
+
+    return 0;
+}
+
+static void instance_ptr_address_to_hw_values(NvU64 instance_ptr_address,
+                                              NvU32 *instance_ptr_lo,
+                                              NvU32 *instance_ptr_hi)
+{
+    // instance_ptr must be 4K aligned
+    UVM_ASSERT_MSG(IS_ALIGNED(instance_ptr_address, 1 << 12), "instance_ptr 0x%llx\n", instance_ptr_address);
+    instance_ptr_address >>= 12;
+
+    *instance_ptr_lo = instance_ptr_address & HWMASK(C076, CLEAR_FAULTED_A, INST_LOW);
+    *instance_ptr_hi = instance_ptr_address >> HWSIZE(C076, CLEAR_FAULTED_A, INST_LOW);
+}
+
+static NvU32 mmu_engine_type_to_hw_value(uvm_mmu_engine_type_t mmu_engine_type)
+{
+    switch (mmu_engine_type) {
+        case UVM_MMU_ENGINE_TYPE_HOST:
+            return HWCONST(C076, CLEAR_FAULTED_A, TYPE, PBDMA_FAULTED);
+        case UVM_MMU_ENGINE_TYPE_CE:
+            return HWCONST(C076, CLEAR_FAULTED_A, TYPE, ENG_FAULTED);
+        default:
+            UVM_ASSERT_MSG(false, "Unsupported MMU engine type %s\n",
+                       uvm_mmu_engine_type_string(mmu_engine_type));
+    }
+
+    return 0;
+}
+
+void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
+                                                         uvm_user_channel_t *user_channel,
+                                                         const uvm_fault_buffer_entry_t *fault)
+{
+    NvU32 clear_type_value;
+    NvU32 aperture_type_value;
+    NvU32 instance_ptr_lo, instance_ptr_hi;
+    uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
+
+    UVM_ASSERT(user_channel->gpu->parent->has_clear_faulted_channel_sw_method);
+
+    clear_type_value = mmu_engine_type_to_hw_value(fault->fault_source.mmu_engine_type);
+    aperture_type_value = instance_ptr_aperture_type_to_hw_value(instance_ptr.aperture);
+
+    instance_ptr_address_to_hw_values(instance_ptr.address, &instance_ptr_lo, &instance_ptr_hi);
+
+    NV_PUSH_2U(C076, CLEAR_FAULTED_A, HWVALUE(C076, CLEAR_FAULTED_A, INST_LOW, instance_ptr_lo) |
+                                      aperture_type_value |
+                                      clear_type_value,
+                     CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
+}
+
+// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            uvm_membar_t membar)
+{
+    NvU32 aperture_value;
+    NvU32 page_table_level;
+    NvU32 pdb_lo;
+    NvU32 pdb_hi;
+    NvU32 ack_value = 0;
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+
+    if (pdb.aperture == UVM_APERTURE_VID)
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
+    else
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+
+    pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+
+    // PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
+    // for details.
+    UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
+    page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
+
+    if (membar != UVM_MEMBAR_NONE) {
+        // If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
+        // GLOBALLY to make sure all the pending accesses can be picked up by
+        // the membar.
+        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
+    }
+
+    NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
+                     MEM_OP_B, 0,
+                     MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
+                               HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
+                               HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
+                               HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
+                               HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
+                               aperture_value |
+                               ack_value,
+                     MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
+                               HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
+
+    uvm_hal_tlb_invalidate_membar(push, membar);
+}
+
+// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
+                                           uvm_gpu_phys_address_t pdb,
+                                           NvU32 depth,
+                                           NvU64 base,
+                                           NvU64 size,
+                                           NvU32 page_size,
+                                           uvm_membar_t membar)
+{
+    NvU32 aperture_value;
+    NvU32 page_table_level;
+    NvU32 pdb_lo;
+    NvU32 pdb_hi;
+    NvU32 ack_value = 0;
+    NvU32 va_lo;
+    NvU32 va_hi;
+    NvU64 end;
+    NvU64 actual_base;
+    NvU64 actual_size;
+    NvU64 actual_end;
+    NvU32 log2_invalidation_size;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
+    UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
+    UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
+
+    // The invalidation size must be a power-of-two number of pages containing
+    // the passed interval
+    end = base + size - 1;
+    log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
+
+    if (log2_invalidation_size == 64) {
+        // Invalidate everything
+        gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
+        return;
+    }
+
+    // The hardware aligns the target address down to the invalidation size.
+    actual_size = 1ULL << log2_invalidation_size;
+    actual_base = UVM_ALIGN_DOWN(base, actual_size);
+    actual_end = actual_base + actual_size - 1;
+    UVM_ASSERT(actual_end >= end);
+
+    // The invalidation size field expects log2(invalidation size in 4K), not
+    // log2(invalidation size in bytes)
+    log2_invalidation_size -= 12;
+
+    // Address to invalidate, as a multiple of 4K.
+    base >>= 12;
+    va_lo = base & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
+    va_hi = base >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+
+    if (pdb.aperture == UVM_APERTURE_VID)
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
+    else
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+
+    pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+
+    // PDE3 is the highest level on Pascal-Ampere , see the comment in
+    // uvm_pascal_mmu.c for details.
+    UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
+    page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
+
+    if (membar != UVM_MEMBAR_NONE) {
+        // If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
+        // GLOBALLY to make sure all the pending accesses can be picked up by
+        // the membar.
+        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
+    }
+
+    NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
+                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                               HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
+                               HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
+                     MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
+                     MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
+                               HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
+                               HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
+                               HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
+                               HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
+                               aperture_value |
+                               ack_value,
+                     MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
+                               HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
+
+    uvm_hal_tlb_invalidate_membar(push, membar);
+}
+
+// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
+void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
+                                             uvm_gpu_phys_address_t pdb,
+                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params)
+{
+    NvU32 ack_value = 0;
+    NvU32 invalidate_gpc_value = 0;
+    NvU32 aperture_value = 0;
+    NvU32 pdb_lo = 0;
+    NvU32 pdb_hi = 0;
+    NvU32 page_table_level = 0;
+    uvm_membar_t membar;
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+    if (pdb.aperture == UVM_APERTURE_VID)
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
+    else
+        aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+
+    pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+    pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
+
+    if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
+        // PDE3 is the highest level on Pascal, see the comment in
+        // uvm_pascal_mmu.c for details.
+        page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
+    }
+
+    if (params->membar != UvmInvalidateTlbMemBarNone) {
+        // If a GPU or SYS membar is needed, ack_value needs to be set to
+        // GLOBALLY to make sure all the pending accesses can be picked up by
+        // the membar.
+        ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
+    }
+
+    if (params->disable_gpc_invalidate)
+        invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
+    else
+        invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
+
+    if (params->target_va_mode == UvmTargetVaModeTargeted) {
+        NvU64 va = params->va >> 12;
+
+        NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
+        NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
+        NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                                   HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
+                                   HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
+                         MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
+                         MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
+                                   HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
+                                   HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
+                                   HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
+                                   invalidate_gpc_value |
+                                   aperture_value |
+                                   ack_value,
+                         MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
+                                   HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
+    }
+    else {
+        NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
+                                   HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
+                         MEM_OP_B, 0,
+                         MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
+                                   HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
+                                   HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
+                                   HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
+                                   invalidate_gpc_value |
+                                   aperture_value |
+                                   ack_value,
+                         MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
+                                   HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
+    }
+
+    if (params->membar == UvmInvalidateTlbMemBarSys)
+        membar = UVM_MEMBAR_SYS;
+    else if (params->membar == UvmInvalidateTlbMemBarLocal)
+        membar = UVM_MEMBAR_GPU;
+    else
+        membar = UVM_MEMBAR_NONE;
+
+    uvm_hal_tlb_invalidate_membar(push, membar);
+}
--- a/kernel-open/nvidia-uvm/uvm_ampere_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_mmu.c
@@ -0,0 +1,162 @@
+/*******************************************************************************
+    Copyright (c) 2018-2020 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+// For Ampere, UVM page tree 'depth' maps to hardware as follows:
+//
+// UVM depth   HW level                            VA bits
+// 0           PDE3                                48:47
+// 1           PDE2                                46:38
+// 2           PDE1 (or 512M PTE)                  37:29
+// 3           PDE0 (dual 64k/4k PDE, or 2M PTE)   28:21
+// 4           PTE_64K / PTE_4K                    20:16 / 20:12
+
+#include "uvm_types.h"
+#include "uvm_global.h"
+#include "uvm_hal.h"
+#include "uvm_ampere_fault_buffer.h"
+#include "hwref/ampere/ga100/dev_fault.h"
+
+uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
+{
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31)
+        return UVM_MMU_ENGINE_TYPE_HOST;
+
+    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
+        return UVM_MMU_ENGINE_TYPE_CE;
+
+    // We shouldn't be servicing faults from any other engines
+    UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
+                   "Unexpected engine ID: 0x%x\n", mmu_engine_id);
+
+    return UVM_MMU_ENGINE_TYPE_GRAPHICS;
+}
+
+static NvU32 page_table_depth_ampere(NvU32 page_size)
+{
+    // The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
+    if (page_size == UVM_PAGE_SIZE_2M)
+        return 3;
+    else if (page_size == UVM_PAGE_SIZE_512M)
+        return 2;
+    else
+        return 4;
+}
+
+static NvU32 page_sizes_ampere(void)
+{
+    return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
+}
+
+static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
+
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
+{
+    static bool initialized = false;
+
+    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
+
+    // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
+    // 128K big page size for Pascal+ GPUs
+    if (big_page_size == UVM_PAGE_SIZE_128K)
+        return NULL;
+
+    if (!initialized) {
+        uvm_mmu_mode_hal_t *turing_mmu_mode_hal = uvm_hal_mmu_mode_turing(big_page_size);
+        UVM_ASSERT(turing_mmu_mode_hal);
+
+        // The assumption made is that arch_hal->mmu_mode_hal() will be
+        // called under the global lock the first time, so check it here.
+        uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+        ampere_mmu_mode_hal = *turing_mmu_mode_hal;
+        ampere_mmu_mode_hal.page_table_depth = page_table_depth_ampere;
+        ampere_mmu_mode_hal.page_sizes = page_sizes_ampere;
+
+        initialized = true;
+    }
+
+    return &ampere_mmu_mode_hal;
+}
+
+NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id)
+{
+    switch (client_id) {
+        case NV_PFAULT_CLIENT_GPC_RAST:
+        case NV_PFAULT_CLIENT_GPC_GCC:
+        case NV_PFAULT_CLIENT_GPC_GPCCS:
+            return UVM_AMPERE_GPC_UTLB_ID_RGG;
+        case NV_PFAULT_CLIENT_GPC_T1_0:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP0;
+        case NV_PFAULT_CLIENT_GPC_T1_1:
+        case NV_PFAULT_CLIENT_GPC_PE_0:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_0:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP1;
+        case NV_PFAULT_CLIENT_GPC_T1_2:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP2;
+        case NV_PFAULT_CLIENT_GPC_T1_3:
+        case NV_PFAULT_CLIENT_GPC_PE_1:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_1:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP3;
+        case NV_PFAULT_CLIENT_GPC_T1_4:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP4;
+        case NV_PFAULT_CLIENT_GPC_T1_5:
+        case NV_PFAULT_CLIENT_GPC_PE_2:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_2:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP5;
+        case NV_PFAULT_CLIENT_GPC_T1_6:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP6;
+        case NV_PFAULT_CLIENT_GPC_T1_7:
+        case NV_PFAULT_CLIENT_GPC_PE_3:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_3:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP7;
+        case NV_PFAULT_CLIENT_GPC_T1_8:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP8;
+        case NV_PFAULT_CLIENT_GPC_T1_9:
+        case NV_PFAULT_CLIENT_GPC_PE_4:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_4:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP9;
+        case NV_PFAULT_CLIENT_GPC_T1_10:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP10;
+        case NV_PFAULT_CLIENT_GPC_T1_11:
+        case NV_PFAULT_CLIENT_GPC_PE_5:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_5:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP11;
+        case NV_PFAULT_CLIENT_GPC_T1_12:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP12;
+        case NV_PFAULT_CLIENT_GPC_T1_13:
+        case NV_PFAULT_CLIENT_GPC_PE_6:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_6:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP13;
+        case NV_PFAULT_CLIENT_GPC_T1_14:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP14;
+        case NV_PFAULT_CLIENT_GPC_T1_15:
+        case NV_PFAULT_CLIENT_GPC_PE_7:
+        case NV_PFAULT_CLIENT_GPC_TPCCS_7:
+            return UVM_AMPERE_GPC_UTLB_ID_LTP15;
+
+        default:
+            UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
+    }
+
+    return 0;
+}
--- a/kernel-open/nvidia-uvm/uvm_api.h
+++ b/kernel-open/nvidia-uvm/uvm_api.h
@@ -0,0 +1,256 @@
+/*******************************************************************************
+    Copyright (c) 2015-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_API_H__
+#define __UVM_API_H__
+
+#include "uvm_types.h"
+#include "uvm_ioctl.h"
+#include "uvm_linux.h"
+#include "uvm_lock.h"
+#include "uvm_thread_context.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_va_space.h"
+#include "nv_uvm_types.h"
+
+// This weird number comes from UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS. That
+// ioctl is called frequently so we don't want to allocate a copy every time.
+// It's a little over 256 bytes in size.
+#define UVM_MAX_IOCTL_PARAM_STACK_SIZE 288
+
+// The UVM_ROUTE_CMD_* macros are only intended for use in the ioctl routines
+
+// If the BUILD_BUG_ON fires, use __UVM_ROUTE_CMD_ALLOC instead.
+#define __UVM_ROUTE_CMD_STACK(cmd, params_type, function_name, do_init_check)       \
+    case cmd:                                                                       \
+    {                                                                               \
+        params_type params;                                                         \
+        BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE);              \
+        if (nv_copy_from_user(&params, (void __user*)arg, sizeof(params)))          \
+            return -EFAULT;                                                         \
+                                                                                    \
+        params.rmStatus = uvm_global_get_status();                                  \
+        if (params.rmStatus == NV_OK) {                                             \
+            if (do_init_check)                                                      \
+                params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
+            if (likely(params.rmStatus == NV_OK))                                   \
+                params.rmStatus = function_name(&params, filp);                     \
+        }                                                                           \
+                                                                                    \
+        if (nv_copy_to_user((void __user*)arg, &params, sizeof(params)))            \
+            return -EFAULT;                                                         \
+                                                                                    \
+        return 0;                                                                   \
+    }
+
+// We need to concatenate cmd##_PARAMS here to avoid the preprocessor's argument
+// prescan. Attempting concatenation in the lower-level macro will fail because
+// it will have been expanded to a literal by then.
+#define UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(cmd, function_name) \
+    __UVM_ROUTE_CMD_STACK(cmd, cmd##_PARAMS, function_name, false)
+
+#define UVM_ROUTE_CMD_STACK_INIT_CHECK(cmd, function_name) \
+    __UVM_ROUTE_CMD_STACK(cmd, cmd##_PARAMS, function_name, true)
+
+// If the BUILD_BUG_ON fires, use __UVM_ROUTE_CMD_STACK instead
+#define __UVM_ROUTE_CMD_ALLOC(cmd, params_type, function_name, do_init_check)           \
+    case cmd:                                                                           \
+    {                                                                                   \
+        int ret = 0;                                                                    \
+        params_type *params = uvm_kvmalloc(sizeof(*params));                            \
+        if (!params)                                                                    \
+            return -ENOMEM;                                                             \
+        BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE);                \
+        if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) {            \
+            uvm_kvfree(params);                                                         \
+            return -EFAULT;                                                             \
+        }                                                                               \
+                                                                                        \
+        params->rmStatus = uvm_global_get_status();                                     \
+        if (params->rmStatus == NV_OK) {                                                \
+            if (do_init_check)                                                          \
+                params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp));    \
+            if (likely(params->rmStatus == NV_OK))                                      \
+                params->rmStatus = function_name(params, filp);                         \
+        }                                                                               \
+                                                                                        \
+        if (nv_copy_to_user((void __user*)arg, params, sizeof(*params)))                \
+            ret = -EFAULT;                                                              \
+                                                                                        \
+        uvm_kvfree(params);                                                             \
+        return ret;                                                                     \
+    }
+
+#define UVM_ROUTE_CMD_ALLOC_NO_INIT_CHECK(cmd, function_name) \
+    __UVM_ROUTE_CMD_ALLOC(cmd, cmd##_PARAMS, function_name, false)
+
+#define UVM_ROUTE_CMD_ALLOC_INIT_CHECK(cmd, function_name) \
+    __UVM_ROUTE_CMD_ALLOC(cmd, cmd##_PARAMS, function_name, true)
+
+// Wrap an entry point into the UVM module.
+//
+// An entry function with signature
+//
+//    return_type foo(...);
+//
+// is required to have a counterpart of the form
+//
+//    return_type foo_entry(...) {
+//        UVM_ENTRY_RET(foo(...));
+//   }
+//
+// An entry function with signature
+//
+//    void foo(...);
+//
+// is required to have a counterpart of the form
+//
+//    void foo_entry(...) {
+//        UVM_ENTRY_VOID(foo(...));
+//   }
+//
+// Invocations of foo must be replaced by invocations of foo_entry at the entry
+// points.
+#define UVM_ENTRY_WRAP(line)                                                        \
+    do {                                                                            \
+        bool added;                                                                 \
+                                                                                    \
+        if (in_interrupt()) {                                                       \
+            line;                                                                   \
+        }                                                                           \
+        else if (uvm_thread_context_wrapper_is_used()) {                            \
+            uvm_thread_context_wrapper_t thread_context_wrapper;                    \
+                                                                                    \
+            added = uvm_thread_context_add(&thread_context_wrapper.context);        \
+            line;                                                                   \
+            if (added)                                                              \
+                uvm_thread_context_remove(&thread_context_wrapper.context);         \
+        }                                                                           \
+        else {                                                                      \
+            uvm_thread_context_t thread_context;                                    \
+                                                                                    \
+            added = uvm_thread_context_add(&thread_context);                        \
+            line;                                                                   \
+            if (added)                                                              \
+                uvm_thread_context_remove(&thread_context);                         \
+        }                                                                           \
+    } while (0)                                                                     \
+
+// Wrapper for non-void functions
+#define UVM_ENTRY_RET(func_call)               \
+    do {                                       \
+        typeof(func_call) ret;                 \
+        UVM_ENTRY_WRAP((ret = (func_call)));   \
+        return ret;                            \
+    } while (0)                                \
+
+// Wrapper for void functions
+#define UVM_ENTRY_VOID UVM_ENTRY_WRAP
+
+// Validate input ranges from the user with specific alignment requirement
+static bool uvm_api_range_invalid_aligned(NvU64 base, NvU64 length, NvU64 alignment)
+{
+    return !IS_ALIGNED(base, alignment)     ||
+           !IS_ALIGNED(length, alignment)   ||
+           base == 0                        ||
+           length == 0                      ||
+           base + length < base; // Overflow
+}
+
+// Most APIs require PAGE_SIZE alignment
+static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
+{
+    return uvm_api_range_invalid_aligned(base, length, PAGE_SIZE);
+}
+
+// Some APIs can only enforce 4K alignment as it's the smallest GPU page size
+// even when the smallest host page is larger (e.g. 64K on ppc64le).
+static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
+{
+    return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);
+}
+
+// Verify alignment on a 64K boundary.
+static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
+{
+    return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
+}
+
+// Returns true if the interval [start, start + length -1] is entirely covered
+// by vmas.
+//
+// LOCKING: mm->mmap_lock must be held in at least read mode.
+bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
+
+// Check that the interval [base, base + length) is fully covered by UVM
+// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
+// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
+// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
+// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
+//
+// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
+//          mm->mmap_lock must also be held in at least read mode.
+NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
+
+NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_create_range_group(UVM_CREATE_RANGE_GROUP_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_destroy_range_group(UVM_DESTROY_RANGE_GROUP_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_enable_peer_access(UVM_ENABLE_PEER_ACCESS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_disable_peer_access(UVM_DISABLE_PEER_ACCESS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_set_range_group(UVM_SET_RANGE_GROUP_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_create_external_range(UVM_CREATE_EXTERNAL_RANGE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_map_external_allocation(UVM_MAP_EXTERNAL_ALLOCATION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_map_external_sparse(UVM_MAP_EXTERNAL_SPARSE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_free(UVM_FREE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_prevent_migration_range_groups(UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_allow_migration_range_groups(UVM_ALLOW_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unset_preferred_location(const UVM_UNSET_PREFERRED_LOCATION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_set_accessed_by(const UVM_SET_ACCESSED_BY_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unset_accessed_by(const UVM_UNSET_ACCESSED_BY_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_register_gpu_va_space(UVM_REGISTER_GPU_VASPACE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unregister_gpu_va_space(UVM_UNREGISTER_GPU_VASPACE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_register_channel(UVM_REGISTER_CHANNEL_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unregister_channel(UVM_UNREGISTER_CHANNEL_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_enable_read_duplication(const UVM_ENABLE_READ_DUPLICATION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_disable_read_duplication(const UVM_DISABLE_READ_DUPLICATION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_enable_system_wide_atomics(UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_disable_system_wide_atomics(UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp);
+NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp);
+
+#endif // __UVM_API_H__
--- a/kernel-open/nvidia-uvm/uvm_ats.c
+++ b/kernel-open/nvidia-uvm/uvm_ats.c
@@ -0,0 +1,193 @@
+/*******************************************************************************
+    Copyright (c) 2018-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_va_space.h"
+#include "uvm_ats.h"
+#include "uvm_global.h"
+#include "uvm_gpu.h"
+
+static int uvm_ats_mode = 1;
+module_param(uvm_ats_mode, int, S_IRUGO);
+MODULE_PARM_DESC(uvm_ats_mode, "Set to 0 to disable ATS (Address Translation Services). "
+                               "Any other value is ignored. Has no effect unless the "
+                               "platform supports ATS.");
+
+void uvm_ats_init(const UvmPlatformInfo *platform_info)
+{
+    g_uvm_global.ats.supported = platform_info->atsSupported;
+
+    g_uvm_global.ats.enabled = uvm_ats_mode                     &&
+                               g_uvm_global.ats.supported       &&
+                               UVM_ATS_SUPPORTED()              &&
+                               uvm_va_space_mm_enabled_system();
+}
+
+void uvm_ats_init_va_space(uvm_va_space_t *va_space)
+{
+    if (UVM_ATS_IBM_SUPPORTED())
+        uvm_ats_ibm_init_va_space(va_space);
+}
+
+NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    if (UVM_ATS_IBM_SUPPORTED()) {
+        // uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
+        // sets parent_gpu->npu. Not setting parent_gpu->npu will result in
+        // incorrect NVLink addresses. See dma_addr_to_gpu_addr().
+
+        return uvm_ats_ibm_add_gpu(parent_gpu);
+    }
+
+
+
+
+
+
+
+    return NV_OK;
+}
+
+void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    if (UVM_ATS_IBM_SUPPORTED()) {
+        // uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
+        // uvm_ibm_add_gpu() is called even in that case and
+        // uvm_ibm_remove_gpu() needs to undo the work done by
+        // uvm_ats_add_gpu() (gpu retained_count etc.).
+
+        uvm_ats_ibm_remove_gpu(parent_gpu);
+    }
+
+
+
+
+
+
+}
+
+NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+    NV_STATUS status = NV_OK;
+
+    UVM_ASSERT(gpu_va_space);
+
+    if (!gpu_va_space->ats.enabled)
+        return status;
+
+    uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
+    uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);
+
+    if (UVM_ATS_IBM_SUPPORTED())
+        status = uvm_ats_ibm_bind_gpu(gpu_va_space);
+
+
+
+
+
+    return status;
+}
+
+void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+    UVM_ASSERT(gpu_va_space);
+
+    if (!gpu_va_space->ats.enabled)
+        return;
+
+    if (UVM_ATS_IBM_SUPPORTED())
+        uvm_ats_ibm_unbind_gpu(gpu_va_space);
+
+
+
+
+}
+
+NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_space_t *va_space;
+    uvm_gpu_id_t gpu_id;
+
+    UVM_ASSERT(gpu_va_space);
+
+    if (!gpu_va_space->ats.enabled)
+        return status;
+
+    va_space = gpu_va_space->va_space;
+    UVM_ASSERT(va_space);
+
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    gpu_id = gpu_va_space->gpu->id;
+
+    // Prevent multiple registrations of the same gpu_va_space for ATS access.
+    if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
+        return NV_ERR_INVALID_DEVICE;
+
+    if (UVM_ATS_IBM_SUPPORTED())
+        status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
+
+
+
+
+
+    if (status == NV_OK)
+        uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
+
+    return status;
+}
+
+void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_gpu_id_t gpu_id;
+    uvm_va_space_t *va_space;
+
+    UVM_ASSERT(gpu_va_space);
+
+    if (!gpu_va_space->ats.enabled)
+        return;
+
+    va_space = gpu_va_space->va_space;
+    gpu_id = gpu_va_space->gpu->id;
+
+    if (UVM_ATS_IBM_SUPPORTED())
+        uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
+
+
+
+
+
+    uvm_va_space_down_write(va_space);
+    uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
+    uvm_va_space_up_write(va_space);
+}
+
+void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
+{
+    // We can only reach here from the mmu_notifier callbacks and these callbacks
+    // wouldn't have been registered if ATS wasn't enabled.
+    UVM_ASSERT(g_uvm_global.ats.enabled);
+
+    if (UVM_ATS_IBM_SUPPORTED())
+        uvm_ats_ibm_invalidate(va_space, start, end);
+}
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -0,0 +1,152 @@
+/*******************************************************************************
+    Copyright (c) 2018-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_ATS_H__
+#define __UVM_ATS_H__
+
+#include "uvm_linux.h"
+#include "uvm_forward_decl.h"
+#include "uvm_ats_ibm.h"
+#include "nv_uvm_types.h"
+
+
+
+
+
+
+    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
+
+
+typedef struct
+{
+    // Mask of gpu_va_spaces which are registered for ATS access. The mask is
+    // indexed by gpu->id. This mask is protected by the VA space lock.
+    uvm_processor_mask_t registered_gpu_va_spaces;
+
+    union
+    {
+        uvm_ibm_va_space_t ibm;
+
+
+
+
+
+
+    };
+} uvm_ats_va_space_t;
+
+typedef struct
+{
+    // Each GPU VA space can have ATS enabled or disabled in its hardware
+    // state. This is controlled by user space when it allocates that GPU VA
+    // space object from RM. This flag indicates the mode user space
+    // requested when allocating this GPU VA space.
+    bool enabled;
+
+    NvU32 pasid;
+
+    union
+    {
+        uvm_ibm_gpu_va_space_t ibm;
+
+
+
+
+
+
+    };
+} uvm_ats_gpu_va_space_t;
+
+// Initializes driver-wide ATS state
+//
+// LOCKING: None
+void uvm_ats_init(const UvmPlatformInfo *platform_info);
+
+// Initializes ATS specific GPU state
+//
+// LOCKING: None
+void uvm_ats_init_va_space(uvm_va_space_t *va_space);
+
+// Enables ATS feature on the GPU.
+//
+// LOCKING: g_uvm_global.global lock mutex must be held.
+NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu);
+
+// Disables ATS feature on the GPU. The caller is responsible for ensuring
+// that the GPU won't issue ATS requests anymore prior to calling this function.
+//
+// LOCKING: g_uvm_global.global lock mutex must be held.
+void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
+
+// Creates a binding on the GPU for the mm associated with the VA space
+// (va_space_mm). Multiple calls to this function are tracked and refcounted for
+// the specific {gpu, mm} pair. A successful uvm_ats_add_gpu() must precede a
+// call to this function.
+//
+// LOCKING: mmap_lock must be lockable.
+//          VA space lock must be lockable.
+//          gpu_va_space->gpu must be retained.
+
+
+
+
+NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
+
+// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
+// mm (va_space_mm) to this GPU when the refcount reaches zero.
+//
+// LOCKING: None
+void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
+
+// Enables ATS access on the GPU for the mm_struct associated with the VA space
+// (va_space_mm) and assigns a PASID. A successful uvm_ats_bind_gpu() must
+// precede a call to this function. Returns NV_ERR_INVALID_DEVICE if the
+// gpu_va_space is already registered for ATS access.
+//
+// LOCKING: The VA space lock must be held in write mode.
+//          mm has to be retained prior to calling this function.
+//          current->mm->mmap_lock must be held in write mode iff
+//          UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
+NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+// Disables ATS access for the gpu_va_space. Prior to calling this function,
+// the caller must guarantee that the GPU will no longer make any ATS
+// accesses in this GPU VA space, and that no ATS fault handling for this
+// GPU will be attempted.
+//
+// LOCKING: This function may block on mmap_lock and will acquire the VA space
+// lock, so neither lock must be held.
+void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+// Synchronously invalidate ATS translations cached by GPU TLBs. The
+// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
+// covers all pages touching any part of the given range. end is inclusive.
+//
+// GMMU translations in the given range are not guaranteed to be
+// invalidated.
+//
+// LOCKING: No locks are required, but this function may be called with
+//          interrupts disabled.
+void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
+
+#endif // __UVM_ATS_H__
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -0,0 +1,232 @@
+/*******************************************************************************
+    Copyright (c) 2018 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#include "uvm_tools.h"
+#include "uvm_va_range.h"
+#include "uvm_ats_faults.h"
+#include "uvm_migrate_pageable.h"
+
+static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
+                                       NvU64 fault_addr,
+                                       uvm_fault_access_type_t access_type)
+{
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
+    NV_STATUS status;
+    NvU64 start;
+    NvU64 length;
+
+    // Request uvm_migrate_pageable() to touch the corresponding page after
+    // population.
+    // Under virtualization ATS provides two translations:
+    // 1) guest virtual -> guest physical
+    // 2) guest physical -> host physical
+    //
+    // The overall ATS translation will fault if either of those translations is
+    // invalid. The get_user_pages() call above handles translation #1, but not
+    // #2. We don't know if we're running as a guest, but in case we are we can
+    // force that translation to be valid by touching the guest physical address
+    // from the CPU. If the translation is not valid then the access will cause
+    // a hypervisor fault. Note that dma_map_page() can't establish mappings
+    // used by GPU ATS SVA translations. GPU accesses to host physical addresses
+    // obtained as a result of the address translation request uses the CPU
+    // address space instead of the IOMMU address space since the translated
+    // host physical address isn't necessarily an IOMMU address. The only way to
+    // establish guest physical to host physical mapping in the CPU address
+    // space is to touch the page from the CPU.
+    //
+    // We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
+    // VM_WRITE, meaning that the mappings are all granted write access on any
+    // fault and that the kernel will never revoke them.
+    // drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
+    // that a read fault is always sufficient to also enable write access on the
+    // guest translation.
+
+    uvm_migrate_args_t uvm_migrate_args =
+    {
+        .va_space               = va_space,
+        .mm                     = mm,
+        .start                  = fault_addr,
+        .length                 = PAGE_SIZE,
+        .dst_id                 = gpu_va_space->gpu->parent->id,
+        .dst_node_id            = -1,
+        .populate_permissions   = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
+        .touch                  = true,
+        .skip_mapped            = true,
+        .user_space_start       = &start,
+        .user_space_length      = &length,
+    };
+
+    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
+
+    // TODO: Bug 2103669: Service more than a single fault at a time
+    //
+    // We are trying to use migrate_vma API in the kernel (if it exists) to
+    // populate and map the faulting region on the GPU. We want to do this only
+    // on the first touch. That is, pages which are not already mapped. So, we
+    // set skip_mapped to true. For pages already mapped, this will only handle
+    // PTE upgrades if needed.
+    status = uvm_migrate_pageable(&uvm_migrate_args);
+    if (status == NV_WARN_NOTHING_TO_DO)
+        status = NV_OK;
+
+    UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
+
+    return status;
+}
+
+NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
+                                      uvm_fault_buffer_entry_t *current_entry,
+                                      uvm_ats_fault_invalidate_t *ats_invalidate)
+{
+    NvU64 gmmu_region_base;
+    bool in_gmmu_region;
+    NV_STATUS status = NV_OK;
+    uvm_fault_access_type_t service_access_type;
+
+    UVM_ASSERT(g_uvm_global.ats.enabled);
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+
+    UVM_ASSERT(current_entry->fault_access_type ==
+               uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
+
+    service_access_type = current_entry->fault_access_type;
+
+    // ATS lookups are disabled on all addresses within the same
+    // UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
+    // uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
+    // prevent any system memory allocations from falling within the NO_ATS
+    // range of other GMMU mappings, so this shouldn't happen during normal
+    // operation. However, since this scenario may lead to infinite fault loops,
+    // we handle it by canceling the fault.
+    //
+    // TODO: Bug 2103669: Remove redundant VA range lookups
+    gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
+    in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
+                                               gmmu_region_base,
+                                               gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
+    if (in_gmmu_region) {
+        status = NV_ERR_INVALID_ADDRESS;
+    }
+    else {
+        // TODO: Bug 2103669: Service more than a single fault at a time
+        status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
+    }
+
+    // Do not flag prefetch faults as fatal unless something fatal happened
+    if (status == NV_ERR_INVALID_ADDRESS) {
+        if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
+            current_entry->is_fatal = true;
+            current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+
+            // Compute cancel mode for replayable faults
+            if (current_entry->is_replayable) {
+                if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
+                    current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
+                else
+                    current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
+
+                // If there are pending read accesses on the same page, we have to
+                // service them before we can cancel the write/atomic faults. So we
+                // retry with read fault access type.
+                if (!in_gmmu_region &&
+                    current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
+                    uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
+                    status = uvm_ats_service_fault(gpu_va_space,
+                                                   current_entry->fault_address,
+                                                   UVM_FAULT_ACCESS_TYPE_READ);
+
+                    // If read accesses are also invalid, cancel the fault. If a
+                    // different error code is returned, exit
+                    if (status == NV_ERR_INVALID_ADDRESS)
+                        current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
+                    else if (status != NV_OK)
+                        return status;
+                }
+            }
+        }
+        else {
+            current_entry->is_invalid_prefetch = true;
+        }
+
+        // Do not fail overall fault servicing due to logical errors
+        status = NV_OK;
+    }
+
+    // The Linux kernel never invalidates TLB entries on mapping permission
+    // upgrade. This is a problem if the GPU has cached entries with the old
+    // permission. The GPU will re-fetch the entry if the PTE is invalid and
+    // page size is not 4K (this is the case on P9). However, if a page gets
+    // upgraded from R/O to R/W and GPU has the PTEs cached with R/O
+    // permissions we will enter an infinite loop because we just forward the
+    // fault to the Linux kernel and it will see that the permissions in the
+    // page table are correct. Therefore, we flush TLB entries on ATS write
+    // faults.
+    if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
+        if (!ats_invalidate->write_faults_in_batch) {
+            uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
+            ats_invalidate->write_faults_in_batch = true;
+        }
+
+        uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
+                                 current_entry->fault_address,
+                                 PAGE_SIZE,
+                                 PAGE_SIZE,
+                                 UVM_MEMBAR_NONE);
+    }
+
+    return status;
+}
+
+NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
+                                  uvm_ats_fault_invalidate_t *ats_invalidate,
+                                  uvm_tracker_t *out_tracker)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+
+    if (!ats_invalidate->write_faults_in_batch)
+        return NV_OK;
+
+    UVM_ASSERT(gpu_va_space);
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+
+    status = uvm_push_begin(gpu_va_space->gpu->channel_manager,
+                            UVM_CHANNEL_TYPE_MEMOPS,
+                            &push,
+                            "Invalidate ATS entries");
+
+    if (status == NV_OK) {
+        uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
+        uvm_push_end(&push);
+
+        // Add this push to the GPU's tracker so that fault replays/clears can
+        // wait on it
+        status = uvm_tracker_add_push_safe(out_tracker, &push);
+    }
+
+    ats_invalidate->write_faults_in_batch = false;
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@@ -0,0 +1,47 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#include "uvm_linux.h"
+#include "uvm_forward_decl.h"
+#include "uvm_lock.h"
+#include "uvm_global.h"
+#include "uvm_va_space.h"
+
+NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
+                                      uvm_fault_buffer_entry_t *current_entry,
+                                      uvm_ats_fault_invalidate_t *ats_invalidate);
+
+// This function performs pending TLB invalidations for ATS and clears the
+// ats_invalidate->write_faults_in_batch flag
+NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
+                                  uvm_ats_fault_invalidate_t *ats_invalidate,
+                                  uvm_tracker_t *out_tracker);
+
+static bool uvm_ats_can_service_faults(uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm)
+{
+    if (mm)
+        uvm_assert_mmap_lock_locked(mm);
+    if (gpu_va_space->ats.enabled)
+        UVM_ASSERT(g_uvm_global.ats.enabled);
+
+    return gpu_va_space->ats.enabled && mm;
+}
--- a/kernel-open/nvidia-uvm/uvm_ats_ibm.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_ibm.c
@@ -0,0 +1,715 @@
+/*******************************************************************************
+    Copyright (c) 2018-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_api.h"
+#include "uvm_lock.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_global.h"
+#include "uvm_va_space.h"
+#include "uvm_va_space_mm.h"
+#include "uvm_ats_ibm.h"
+#include "uvm_common.h"
+
+#include <linux/pci.h>
+
+#if UVM_IBM_NPU_SUPPORTED()
+
+#include <linux/of.h>
+#include <linux/sizes.h>
+#include <asm/pci-bridge.h>
+#include <asm/io.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+#define NPU_ATSD_REG_MAP_SIZE 32
+
+// There are three 8-byte registers in each ATSD mapping:
+#define NPU_ATSD_REG_LAUNCH 0
+#define NPU_ATSD_REG_AVA    1
+#define NPU_ATSD_REG_STAT   2
+
+// Fields within the NPU_ATSD_REG_LAUNCH register:
+
+// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
+// PASID.
+#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE    13
+
+// "PID" field. This specifies the PASID target of the invalidate.
+#define NPU_ATSD_REG_LAUNCH_PASID_VAL       38
+
+// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
+// means all entries will be invalidated.
+#define NPU_ATSD_REG_LAUNCH_INVAL_ALL       12
+
+// "AP" field. This encodes the size of a range-based invalidate.
+#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE      17
+
+// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
+// invalidate, 1 will not.
+#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE   39
+
+// Helper to iterate over the active NPUs in the given VA space (all NPUs with
+// GPUs that have GPU VA spaces registered in this VA space).
+#define for_each_npu_index_in_va_space(npu_index, va_space)                                                 \
+    for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock);                                           \
+           (npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);});               \
+           (npu_index) < NV_MAX_NPUS;                                                                       \
+           (npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
+
+// An invalidate requires operating on one set of registers in each NPU. This
+// struct tracks which register set (id) is in use per NPU for a given
+// operation.
+typedef struct
+{
+    NvU8 ids[NV_MAX_NPUS];
+} uvm_atsd_regs_t;
+
+// Get the index of the input npu pointer within UVM's global npus array
+static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
+{
+    size_t npu_index = npu - &g_uvm_global.npus[0];
+    UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
+    return npu_index;
+}
+
+// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
+// is found. Returns NULL if no slots are available.
+static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
+{
+    size_t i;
+    uvm_ibm_npu_t *npu, *first_free = NULL;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
+        npu = &g_uvm_global.npus[i];
+        if (npu->num_retained_gpus == 0) {
+            if (!first_free)
+                first_free = npu;
+        }
+        else if (npu->pci_domain == pci_domain) {
+            return npu;
+        }
+    }
+
+    return first_free;
+}
+
+static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
+{
+    size_t i;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    UVM_ASSERT(npu->num_retained_gpus == 0);
+    UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
+
+    for (i = 0; i < npu->atsd_regs.count; i++) {
+        UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
+        iounmap(npu->atsd_regs.io_addrs[i]);
+    }
+
+    memset(npu, 0, sizeof(*npu));
+}
+
+static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
+{
+    struct pci_controller *hose;
+    size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
+    int ret;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    UVM_ASSERT(npu->num_retained_gpus == 0);
+    UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
+
+    npu->pci_domain = pci_domain_nr(npu_dev->bus);
+
+    if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
+        return NV_OK;
+
+    hose = pci_bus_to_host(npu_dev->bus);
+
+    ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
+    if (ret < 0) {
+        UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
+        return errno_to_nv_status(ret);
+    }
+
+    // For ATS to be enabled globally, we must have NPU ATSD registers
+    reg_count = ret;
+    if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
+        UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
+        return NV_ERR_INVALID_STATE;
+    }
+
+    // Map the ATSD registers
+    for (i = 0; i < reg_count; i++) {
+        u64 phys_addr;
+        __be64 __iomem *io_addr;
+        ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
+        UVM_ASSERT(ret == 0);
+
+        io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
+        if (!io_addr) {
+            uvm_ibm_npu_destroy(npu);
+            return NV_ERR_NO_MEMORY;
+        }
+
+        npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
+    uvm_ibm_npu_t *npu;
+    NV_STATUS status;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    if (!npu_dev)
+        return NV_OK;
+
+    npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
+    if (!npu) {
+        // If this happens then we can't support the system configuation until
+        // NV_MAX_NPUS is updated. Return the same error as when the number of
+        // GPUs exceeds UVM_MAX_GPUS.
+        UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
+        return NV_ERR_INSUFFICIENT_RESOURCES;
+    }
+
+    if (npu->num_retained_gpus == 0) {
+        status = uvm_ibm_npu_init(npu, npu_dev);
+        if (status != NV_OK)
+            return status;
+    }
+
+    // This npu field could be read concurrently by a thread in the ATSD
+    // invalidate path. We don't need to provide ordering with those threads
+    // because those invalidates won't apply to the GPU being added until a GPU
+    // VA space on this GPU is registered.
+    npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
+
+    parent_gpu->npu = npu;
+    ++npu->num_retained_gpus;
+    return NV_OK;
+}
+
+void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_ibm_npu_t *npu = parent_gpu->npu;
+    uvm_parent_gpu_t *other_parent_gpu;
+    NvU32 num_membars_new = 0;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    if (!npu)
+        return;
+
+    UVM_ASSERT(npu->num_retained_gpus > 0);
+    if (--npu->num_retained_gpus == 0) {
+        uvm_ibm_npu_destroy(npu);
+    }
+    else {
+        // Re-calculate the membar count
+        for_each_parent_gpu(other_parent_gpu) {
+            // The current GPU being removed should've already been removed from
+            // the global list.
+            UVM_ASSERT(other_parent_gpu != parent_gpu);
+            if (other_parent_gpu->npu == npu)
+                num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
+        }
+
+        UVM_ASSERT(num_membars_new > 0);
+        npu->atsd_regs.num_membars = num_membars_new;
+    }
+}
+
+#if UVM_ATS_IBM_SUPPORTED()
+
+void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
+{
+    uvm_ibm_va_space_t *ibm_va_space;
+
+    UVM_ASSERT(va_space);
+    ibm_va_space = &va_space->ats.ibm;
+
+    uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
+}
+
+#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
+static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
+{
+    // See the comment on the call to pnv_npu2_init_context()
+}
+
+static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
+    struct npu_context *npu_context;
+
+    // pnv_npu2_init_context() registers current->mm with
+    // mmu_notifier_register(). We need that to match the mm we passed to our
+    // own mmu_notifier_register() for this VA space.
+    if (current->mm != va_space->va_space_mm.mm)
+        return NV_ERR_NOT_SUPPORTED;
+
+    uvm_assert_mmap_lock_locked_write(current->mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // pnv_npu2_init_context() doesn't handle being called multiple times for
+    // the same GPU under the same mm, which could happen if multiple VA spaces
+    // are created in this process. To handle that we pass the VA space pointer
+    // as the callback parameter: the callback values are shared by all devices
+    // under this mm, so pnv_npu2_init_context() enforces that the values match
+    // the ones already registered to the mm.
+    //
+    // Otherwise we don't use the callback, since we have our own callback
+    // registered under the va_space_mm that will be called at the same point
+    // (mmu_notifier release).
+    npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
+                                        (MSR_DR | MSR_PR | MSR_HV),
+                                        npu_release_dummy,
+                                        va_space);
+    if (IS_ERR(npu_context)) {
+        int err = PTR_ERR(npu_context);
+
+        // We'll get -EINVAL if the callback value (va_space) differs from the
+        // one already registered to the npu_context associated with this mm.
+        // That can only happen when multiple VA spaces attempt registration
+        // within the same process, which is disallowed and should return
+        // NV_ERR_NOT_SUPPORTED.
+        if (err == -EINVAL)
+            return NV_ERR_NOT_SUPPORTED;
+        return errno_to_nv_status(err);
+    }
+
+    ibm_gpu_va_space->npu_context = npu_context;
+
+    return NV_OK;
+}
+
+static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_gpu_va_space_state_t state;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_ibm_va_space_t *ibm_va_space;
+    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
+
+    if (!ibm_gpu_va_space->npu_context)
+        return;
+
+    // va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
+    // not NULL.
+    UVM_ASSERT(va_space);
+
+    state = uvm_gpu_va_space_state(gpu_va_space);
+    UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
+
+    ibm_va_space = &va_space->ats.ibm;
+
+    // pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
+    // If uvm_va_space_mm_shutdown() is concurrently executing in another
+    // thread, mmu_notifier_unregister() will wait for
+    // uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
+    // mmap_lock and the VA space lock, so we can't be holding those locks on
+    // this path.
+    uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
+    uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
+
+    pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
+    ibm_gpu_va_space->npu_context = NULL;
+}
+
+#else
+
+static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
+    uvm_ibm_va_space_t *ibm_va_space;
+
+    UVM_ASSERT(va_space);
+    ibm_va_space = &va_space->ats.ibm;
+
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    uvm_write_lock_irqsave(&ibm_va_space->rwlock);
+
+    // If this is the first GPU VA space to use this NPU in the VA space, mark
+    // the NPU as active so invalidates are issued to it.
+    if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
+        // If this is the first active NPU in the entire VA space, we have to
+        // tell the kernel to send TLB invalidations to the IOMMU. See kernel
+        // commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
+        //
+        // This is safe to do without holding mm_users high or mmap_lock.
+        if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
+            mm_context_add_copro(va_space->va_space_mm.mm);
+
+        UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
+        __set_bit(npu_index, ibm_va_space->npu_active_mask);
+    }
+    else {
+        UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
+    }
+
+    ++ibm_va_space->npu_ref_counts[npu_index];
+
+    // As soon as this lock is dropped, invalidates on this VA space's mm may
+    // begin issuing ATSDs to this NPU.
+    uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
+
+    ibm_gpu_va_space->did_ibm_driver_init = true;
+}
+
+static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
+    bool do_remove = false;
+    uvm_ibm_va_space_t *ibm_va_space;
+    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
+
+    if (!ibm_gpu_va_space->did_ibm_driver_init)
+        return;
+
+    UVM_ASSERT(va_space);
+    ibm_va_space = &va_space->ats.ibm;
+
+    // Note that we aren't holding the VA space lock here, so another thread
+    // could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
+    // now. The write lock and ref counts below will handle that case.
+
+    // Once we return from this function with a bit cleared in the
+    // npu_active_mask, we have to guarantee that this VA space no longer
+    // accesses that NPU's ATSD registers. This is needed in case GPU unregister
+    // needs to unmap those registers. We use the reader/writer lock to
+    // guarantee this, which means that invalidations must not access the ATSD
+    // registers outside of the lock.
+    //
+    // Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
+    // might do that here instead to flush out all invalidates. That would allow
+    // us to avoid taking a read lock in the invalidate path, though we'd have
+    // to be careful when clearing the mask bit relative to the synchronize, and
+    // we'd have to be careful in cases where this thread doesn't hold a
+    // reference to mm_users.
+    uvm_write_lock_irqsave(&ibm_va_space->rwlock);
+
+    UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
+    UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
+
+    --ibm_va_space->npu_ref_counts[npu_index];
+    if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
+        __clear_bit(npu_index, ibm_va_space->npu_active_mask);
+        if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
+            do_remove = true;
+    }
+
+    uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
+
+    if (do_remove) {
+        // mm_context_remove_copro() must be called outside of the spinlock
+        // because it may issue invalidates across CPUs in this mm. The
+        // coprocessor count is atomically refcounted by that function, so it's
+        // safe to call here even if another thread jumps in with a register and
+        // calls mm_context_add_copro() between this thread's unlock and this
+        // call.
+        UVM_ASSERT(va_space->va_space_mm.mm);
+        mm_context_remove_copro(va_space->va_space_mm.mm);
+    }
+}
+
+#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
+
+static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
+{
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+    UVM_ASSERT(mm);
+    return mm->context.id;
+}
+
+NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    NV_STATUS status = NV_OK;
+
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
+    UVM_ASSERT(va_space->va_space_mm.mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
+    status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
+#else
+    uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
+#endif
+
+    gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
+
+    return status;
+}
+
+void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
+    uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
+#else
+    uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
+#endif
+
+    gpu_va_space->ats.pasid = -1U;
+}
+
+#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
+
+// Find any available ATSD register set in this NPU and return that index. This
+// will busy wait until a register set is free.
+static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
+{
+    uvm_spin_loop_t spin;
+    size_t i;
+    bool first = true;
+
+    while (1) {
+        // Using for_each_clear_bit is racy, since the bits could change at any
+        // point. That's ok since we'll either just retry or use a real atomic
+        // to lock the bit. Checking for clear bits first avoids spamming
+        // atomics in the contended case.
+        for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
+            if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
+                return (NvU8)i;
+        }
+
+        // Back off and try again, avoiding the overhead of initializing the
+        // tracking timers unless we need them.
+        if (first) {
+            uvm_spin_loop_init(&spin);
+            first = false;
+        }
+        else {
+            UVM_SPIN_LOOP(&spin);
+        }
+    }
+}
+
+static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
+{
+    UVM_ASSERT(reg < npu->atsd_regs.count);
+    UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
+    clear_bit_unlock(reg, npu->atsd_regs.locks);
+}
+
+static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
+{
+    __be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
+    UVM_ASSERT(reg < npu->atsd_regs.count);
+    return __raw_readq(io_addr);
+}
+
+static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
+{
+    __be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
+    UVM_ASSERT(reg < npu->atsd_regs.count);
+    __raw_writeq_be(val, io_addr);
+}
+
+// Acquire a set of registers in each NPU which is active in va_space
+static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
+{
+    size_t i;
+    for_each_npu_index_in_va_space(i, va_space)
+        regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
+}
+
+static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
+{
+    size_t i;
+    for_each_npu_index_in_va_space(i, va_space)
+        atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
+}
+
+// Write the provided value to each NPU active in va_space at the provided
+// register offset.
+static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
+{
+    size_t i;
+    for_each_npu_index_in_va_space(i, va_space)
+        atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
+}
+
+// Wait for all prior operations issued to active NPUs in va_space on the given
+// registers to finish.
+static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
+{
+    uvm_spin_loop_t spin;
+    size_t i;
+    for_each_npu_index_in_va_space(i, va_space) {
+        UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
+            ;
+    }
+}
+
+// Encode an invalidate targeting the given pasid and the given size for the
+// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
+//
+// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
+// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
+static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
+{
+    NvU64 val = 0;
+
+    val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
+    val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
+
+    if (psize == MMU_PAGE_COUNT) {
+        val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
+    }
+    else {
+        // The NPU registers do not support arbitrary sizes
+        UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M  || psize == MMU_PAGE_1G);
+        val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
+    }
+
+    return val;
+}
+
+// Return the encoded size to use for an ATSD targeting the given range, in one
+// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
+// MMU_PAGE_COUNT means the entire address space must be invalidated.
+//
+// start is an in/out parameter. On return start will be set to the aligned
+// starting address to use for the ATSD. end is inclusive.
+static int atsd_calc_size(NvU64 *start, NvU64 end)
+{
+    // ATSDs have high latency, so we prefer to over-invalidate rather than
+    // issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
+    // 1G.
+
+    *start = UVM_ALIGN_DOWN(*start, SZ_64K);
+    end = UVM_ALIGN_DOWN(end, SZ_64K);
+    if (*start == end)
+        return MMU_PAGE_64K;
+
+    *start = UVM_ALIGN_DOWN(*start, SZ_2M);
+    end = UVM_ALIGN_DOWN(end, SZ_2M);
+    if (*start == end)
+        return MMU_PAGE_2M;
+
+    *start = UVM_ALIGN_DOWN(*start, SZ_1G);
+    end = UVM_ALIGN_DOWN(end, SZ_1G);
+    if (*start == end)
+        return MMU_PAGE_1G;
+
+    return MMU_PAGE_COUNT;
+}
+
+// Issue an ATSD to all NPUs and wait for completion
+static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
+{
+    atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
+    atsd_regs_wait(va_space, regs);
+}
+
+// Issue and wait for the required membars following an invalidate
+static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
+{
+    size_t i;
+    NvU32 num_membars = 0;
+
+    // These membars are issued using ATSDs which target a reserved PASID of 0.
+    // That PASID is valid on the GPU in order for the membar to be valid, but
+    // 0 will never be used by the kernel for an actual address space so the
+    // ATSD won't actually invalidate any entries.
+    NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
+
+    for_each_npu_index_in_va_space(i, va_space) {
+        uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
+        num_membars = max(num_membars, npu->atsd_regs.num_membars);
+    }
+
+    for (i = 0; i < num_membars; i++)
+        atsd_launch_wait(va_space, regs, val);
+}
+
+static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
+{
+    NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
+    atsd_launch_wait(va_space, regs, val);
+    atsd_issue_membars(va_space, regs);
+}
+
+static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
+{
+    NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
+
+    // Barriers are expensive, so write all address registers first then do a
+    // single barrier for all of them.
+    atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
+    eieio();
+    atsd_launch_wait(va_space, regs, val);
+    atsd_issue_membars(va_space, regs);
+}
+
+#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
+
+void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
+{
+#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
+    unsigned long irq_flags;
+    uvm_atsd_regs_t regs;
+    NvU64 atsd_start = start;
+    int psize = atsd_calc_size(&atsd_start, end);
+    uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
+
+    BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
+
+    // We must hold this lock in at least read mode when accessing NPU
+    // registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
+    uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
+
+    if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
+        atsd_regs_acquire(va_space, &regs);
+
+        if (psize == MMU_PAGE_COUNT)
+            uvm_ats_ibm_invalidate_all(va_space, &regs);
+        else
+            uvm_ats_ibm_invalidate_range(va_space, &regs, atsd_start, psize);
+
+        atsd_regs_release(va_space, &regs);
+    }
+
+    uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
+#else
+    UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
+#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
+}
+
+#endif // UVM_ATS_IBM_SUPPORTED
+#endif // UVM_IBM_NPU_SUPPORTED
--- a/kernel-open/nvidia-uvm/uvm_ats_ibm.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_ibm.h
@@ -0,0 +1,266 @@
+/*******************************************************************************
+    Copyright (c) 2018-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_ATS_IBM_H__
+#define __UVM_ATS_IBM_H__
+
+#include "uvm_linux.h"
+#include "uvm_forward_decl.h"
+#include "uvm_hal_types.h"
+
+#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
+    #include <asm/mmu.h>
+    #if defined(NV_MAX_NPUS)
+        #define UVM_IBM_NPU_SUPPORTED() 1
+    #else
+        #define UVM_IBM_NPU_SUPPORTED() 0
+    #endif
+#else
+    #define UVM_IBM_NPU_SUPPORTED() 0
+#endif
+
+#if defined(NV_ASM_OPAL_API_H_PRESENT)
+    // For OPAL_NPU_INIT_CONTEXT
+    #include <asm/opal-api.h>
+#endif
+
+// Timeline of kernel changes:
+//
+// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
+//      - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
+//        call.
+//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Not defined
+//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
+//      - OPAL_NPU_INIT_CONTEXT                             Not defined
+//      - ATS support type                                  None
+//
+// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
+//    (2017-04-03)
+//      - This commit added initial support for NPU ATS, including the necessary
+//        OPAL firmware calls. This support was developmental and required
+//        several bug fixes before it could be used in production.
+//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Defined
+//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
+//      - OPAL_NPU_INIT_CONTEXT                             Defined
+//      - ATS support type                                  None
+//
+// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
+//    (2018-04-11)
+//      - This commit changed the function signature for pnv_npu2_init_context's
+//        callback parameter. Since all required bug fixes went in prior to this
+//        change, we can use the callback signature as a flag to indicate
+//        whether the PPC arch layer in the kernel supports ATS in production.
+//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Defined
+//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Defined
+//      - OPAL_NPU_INIT_CONTEXT                             Defined
+//      - ATS support type                                  Kernel
+//
+// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
+//    (2019-06-25)
+//      - This commit removed NPU-ATS support from the PPC arch layer, so the
+//        driver needs to handle things instead. pnv_npu2_init_context is no
+//        longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
+//        between this state and scenario #0.
+//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Not defined
+//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
+//      - OPAL_NPU_INIT_CONTEXT                             Defined
+//      - ATS support type                                  Driver
+//
+#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
+    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   1
+    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   0
+#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
+    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   0
+    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   1
+#else
+    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   0
+    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   0
+#endif
+
+#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
+
+// Maximum number of parallel ATSD register sets per NPU
+#define UVM_MAX_ATSD_REGS 16
+
+typedef struct
+{
+#if UVM_IBM_NPU_SUPPORTED()
+    // These are the active NPUs in this VA space, that is, all NPUs with
+    // GPUs that have GPU VA spaces registered in this VA space.
+    //
+    // If a bit is clear in npu_active_mask then the corresponding entry of
+    // npu_ref_counts is 0. If a bit is set then the corresponding entry of
+    // npu_ref_counts is greater than 0.
+    NvU32 npu_ref_counts[NV_MAX_NPUS];
+    DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
+#endif
+
+    // Lock protecting npu_ref_counts and npu_active_mask. Invalidations
+    // take this lock for read. GPU VA space register and unregister take
+    // this lock for write. Since all invalidations take the lock for read
+    // for the duration of the invalidate, taking the lock for write also
+    // flushes all invalidates.
+    //
+    // This is a spinlock because the invalidation code paths may be called
+    // with interrupts disabled, so those paths can't take the VA space
+    // lock. We could use a normal exclusive spinlock instead, but a reader/
+    // writer lock is preferred to allow concurrent invalidates in the same
+    // VA space.
+    uvm_rwlock_irqsave_t rwlock;
+} uvm_ibm_va_space_t;
+
+typedef struct
+{
+#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
+    struct npu_context *npu_context;
+#endif
+
+    // Used on the teardown path to know what to clean up. npu_context acts
+    // as the equivalent flag for kernel-provided support.
+    bool did_ibm_driver_init;
+} uvm_ibm_gpu_va_space_t;
+
+struct uvm_ibm_npu_struct
+{
+    // Number of retained GPUs under this NPU. The other fields in this struct
+    // are only valid if this is non-zero.
+    unsigned int num_retained_gpus;
+
+    // PCI domain containing this NPU. This acts as a unique system-wide ID for
+    // this UVM NPU.
+    int pci_domain;
+
+    // The ATS-related fields are only valid when ATS support is enabled and
+    // UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
+    struct
+    {
+        // Mapped addresses of the ATSD trigger registers. There may be more
+        // than one set of identical registers per NPU to enable concurrent
+        // invalidates.
+        //
+        // These will not be accessed unless there is a GPU VA space registered
+        // on a GPU under this NPU. They are protected by bit locks in the locks
+        // field.
+        __be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
+
+        // Actual number of registers in the io_addrs array
+        size_t count;
+
+        // Bitmask for allocation and locking of the registers. Bit index n
+        // corresponds to io_addrs[n]. A set bit means that index is in use
+        // (locked).
+        DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
+
+        // Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
+        // for all retained GPUs under this NPU.
+        NvU32 num_membars;
+    } atsd_regs;
+};
+
+#if UVM_IBM_NPU_SUPPORTED()
+    NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
+    void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
+#else
+    static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
+    {
+
+    }
+#endif // UVM_IBM_NPU_SUPPORTED
+
+#if UVM_ATS_IBM_SUPPORTED()
+    // Initializes IBM specific GPU state.
+    //
+    // LOCKING: None
+    void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
+
+    // Enables ATS access for the gpu_va_space on the mm_struct associated with
+    // the VA space (va_space_mm).
+    //
+    // If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
+    // returned if current->mm does not match va_space_mm.mm or if a GPU VA
+    // space within another VA space has already called this function on the
+    // same mm.
+    //
+    // If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
+    //
+    // LOCKING: The VA space lock must be held in write mode.
+    //          current->mm->mmap_lock must be held in write mode iff
+    //          UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
+    NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+    // Disables ATS access for the gpu_va_space. Prior to calling this function,
+    // the caller must guarantee that the GPU will no longer make any ATS
+    // accesses in this GPU VA space, and that no ATS fault handling for this
+    // GPU will be attempted.
+    //
+    // LOCKING: This function may block on mmap_lock and the VA space lock, so
+    //          neither must be held.
+    void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+    // Synchronously invalidate ATS translations cached by GPU TLBs. The
+    // invalidate applies to all GPUs with active GPU VA spaces in va_space, and
+    // covers all pages touching any part of the given range. end is inclusive.
+    //
+    // GMMU translations in the given range are not guaranteed to be
+    // invalidated.
+    //
+    // LOCKING: No locks are required, but this function may be called with
+    //          interrupts disabled.
+    void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
+#else
+    static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
+    {
+
+    }
+    static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+    {
+
+    }
+
+    static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
+    {
+
+    }
+#endif // UVM_ATS_IBM_SUPPORTED
+
+static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+    return NV_OK;
+}
+
+static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+
+}
+
+#endif // __UVM_ATS_IBM_H__
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@@ -0,0 +1,680 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_channel.h"
+#include "uvm_global.h"
+#include "uvm_hal.h"
+#include "uvm_push.h"
+#include "uvm_test.h"
+#include "uvm_tracker.h"
+#include "uvm_va_space.h"
+#include "uvm_rm_mem.h"
+#include "uvm_mem.h"
+
+#define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
+#define CE_TEST_MEM_END_SIZE 32
+#define CE_TEST_MEM_BEGIN_SIZE 32
+#define CE_TEST_MEM_MIDDLE_SIZE (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE - CE_TEST_MEM_END_SIZE)
+#define CE_TEST_MEM_MIDDLE_OFFSET (CE_TEST_MEM_BEGIN_SIZE)
+#define CE_TEST_MEM_END_OFFSET (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE)
+#define CE_TEST_MEM_COUNT 5
+
+static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
+{
+    NvU32 i;
+    NV_STATUS status;
+    uvm_rm_mem_t *mem[CE_TEST_MEM_COUNT] = { NULL };
+    uvm_rm_mem_t *host_mem = NULL;
+    NvU32 *host_ptr;
+    NvU64 host_mem_gpu_va, mem_gpu_va;
+    NvU64 dst_va;
+    NvU64 src_va;
+    uvm_push_t push;
+    bool is_proxy;
+
+    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, &host_mem);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+    host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
+    memset(host_ptr, 0, CE_TEST_MEM_SIZE);
+
+    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
+        status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, &mem[i]);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+    }
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Non-pipelined test");
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    is_proxy = uvm_channel_is_proxy(push.channel);
+    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
+
+    // All of the following CE transfers are done from a single (L)CE and
+    // disabling pipelining is enough to order them when needed. Only push_end
+    // needs a MEMBAR SYS to order everything with the CPU.
+
+    // Initialize to a bad value
+    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
+        mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
+
+        uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+        gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1337 + i, CE_TEST_MEM_SIZE);
+    }
+
+    // Set the first buffer to 1
+    uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+    mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
+    gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);
+
+    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
+        NvU32 dst = i + 1;
+        if (dst == CE_TEST_MEM_COUNT)
+            dst_va = host_mem_gpu_va;
+        else
+            dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
+
+        src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
+
+        // The first memcpy needs to be non-pipelined as otherwise the previous
+        // memset/memcpy to the source may not be done yet.
+
+        // Alternate the order of copying the beginning and the end
+        if (i % 2 == 0) {
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va + CE_TEST_MEM_END_OFFSET, src_va + CE_TEST_MEM_END_OFFSET, CE_TEST_MEM_END_SIZE);
+
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push,
+                                               dst_va + CE_TEST_MEM_MIDDLE_OFFSET,
+                                               src_va + CE_TEST_MEM_MIDDLE_OFFSET,
+                                               CE_TEST_MEM_MIDDLE_SIZE);
+
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE);
+        }
+        else {
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE);
+
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push,
+                                               dst_va + CE_TEST_MEM_MIDDLE_OFFSET,
+                                               src_va + CE_TEST_MEM_MIDDLE_OFFSET,
+                                               CE_TEST_MEM_MIDDLE_SIZE);
+
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+            gpu->parent->ce_hal->memcopy_v_to_v(&push,
+                                               dst_va + CE_TEST_MEM_END_OFFSET,
+                                               src_va + CE_TEST_MEM_END_OFFSET,
+                                               CE_TEST_MEM_END_SIZE);
+        }
+    }
+
+    status = uvm_push_end_and_wait(&push);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+
+    for (i = 0; i < CE_TEST_MEM_SIZE / sizeof(NvU32); ++i) {
+        if (host_ptr[i] != 1) {
+            UVM_TEST_PRINT("host_ptr[%u] = %u instead of 1\n", i, host_ptr[i]);
+            status = NV_ERR_INVALID_STATE;
+            goto done;
+        }
+    }
+
+done:
+    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
+        uvm_rm_mem_free(mem[i]);
+    }
+    uvm_rm_mem_free(host_mem);
+
+    return status;
+}
+
+#define REDUCTIONS 32
+
+static NV_STATUS test_membar(uvm_gpu_t *gpu)
+{
+    NvU32 i;
+    NV_STATUS status;
+    uvm_rm_mem_t *host_mem = NULL;
+    NvU32 *host_ptr;
+    NvU64 host_mem_gpu_va;
+    uvm_push_t push;
+    NvU32 value;
+
+    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), &host_mem);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+    host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
+    *host_ptr = 0;
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
+
+    for (i = 0; i < REDUCTIONS; ++i) {
+        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+        gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
+    }
+
+    // Without a sys membar the channel tracking semaphore can and does complete
+    // before all the reductions.
+    status = uvm_push_end_and_wait(&push);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    value = *host_ptr;
+    if (value != REDUCTIONS) {
+        UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu));
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+done:
+    uvm_rm_mem_free(host_mem);
+
+    return status;
+}
+
+static void push_memset(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t element_size, size_t size)
+{
+    switch (element_size) {
+        case 1:
+            uvm_push_get_gpu(push)->parent->ce_hal->memset_1(push, dst, (NvU8)value, size);
+            break;
+        case 4:
+            uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, dst, (NvU32)value, size);
+            break;
+        case 8:
+            uvm_push_get_gpu(push)->parent->ce_hal->memset_8(push, dst, value, size);
+            break;
+        default:
+            UVM_ASSERT(0);
+    }
+}
+
+static NV_STATUS test_unaligned_memset(uvm_gpu_t *gpu,
+                                       uvm_gpu_address_t gpu_verif_addr,
+                                       NvU8 *cpu_verif_addr,
+                                       size_t size,
+                                       size_t element_size,
+                                       size_t offset)
+{
+    uvm_push_t push;
+    NV_STATUS status;
+    size_t i;
+    NvU64 value64 = (offset + 2) * (1ull << 32) + (offset + 1);
+    NvU64 test_value, expected_value = 0;
+    uvm_gpu_address_t dst;
+
+    // Copy a single element at an unaligned position and make sure it doesn't
+    // clobber anything else
+    TEST_CHECK_RET(gpu_verif_addr.address % element_size == 0);
+    TEST_CHECK_RET(offset + element_size <= size);
+    dst = gpu_verif_addr;
+    dst.address += offset;
+
+    memset(cpu_verif_addr, (NvU8)(~value64), size);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push,
+                            "memset_%zu offset %zu",
+                            element_size, offset);
+    TEST_CHECK_RET(status == NV_OK);
+
+    push_memset(&push, dst, value64, element_size, element_size);
+    status = uvm_push_end_and_wait(&push);
+    TEST_CHECK_RET(status == NV_OK);
+
+    // Make sure all bytes of element are present
+    test_value = 0;
+    memcpy(&test_value, cpu_verif_addr + offset, element_size);
+
+    switch (element_size) {
+        case 1:
+            expected_value = (NvU8)value64;
+            break;
+        case 4:
+            expected_value = (NvU32)value64;
+            break;
+        case 8:
+            expected_value = value64;
+            break;
+        default:
+            UVM_ASSERT(0);
+    }
+
+    if (test_value != expected_value) {
+        UVM_TEST_PRINT("memset_%zu offset %zu failed, written value is 0x%llx instead of 0x%llx\n",
+                       element_size, offset, test_value, expected_value);
+        return NV_ERR_INVALID_STATE;
+    }
+
+    // Make sure all other bytes are unchanged
+    for (i = 0; i < size; i++) {
+        if (i >= offset && i < offset + element_size)
+            continue;
+        if (cpu_verif_addr[i] != (NvU8)(~value64)) {
+            UVM_TEST_PRINT("memset_%zu offset %zu failed, immutable byte %zu changed value from 0x%x to 0x%x\n",
+                           element_size, offset, i, (NvU8)(~value64),
+                           cpu_verif_addr[i]);
+            return NV_ERR_INVALID_STATE;
+        }
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
+                                              uvm_gpu_address_t dst,
+                                              uvm_gpu_address_t src,
+                                              size_t size,
+                                              size_t element_size,
+                                              uvm_gpu_address_t gpu_verif_addr,
+                                              void *cpu_verif_addr,
+                                              int test_iteration)
+{
+    uvm_push_t push;
+    size_t i;
+    const char *src_type = src.is_virtual ? "virtual" : "physical";
+    const char *src_loc = src.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem";
+    const char *dst_type = dst.is_virtual ? "virtual" : "physical";
+    const char *dst_loc = dst.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem";
+
+    NvU64 value64 = (test_iteration + 2) * (1ull << 32) + (test_iteration + 1);
+    NvU64 test_value = 0, expected_value = 0;
+
+    TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager,
+                                     UVM_CHANNEL_TYPE_GPU_INTERNAL,
+                                     &push,
+                                     "Memset %s %s (0x%llx) and memcopy to %s %s (0x%llx), iter %d",
+                                     src_type,
+                                     src_loc,
+                                     src.address,
+                                     dst_type,
+                                     dst_loc,
+                                     dst.address,
+                                     test_iteration));
+
+    // Waive if any of the input addresses is physical but the channel does not
+    // support physical addressing
+    if (!uvm_channel_is_privileged(push.channel) && (!dst.is_virtual || !src.is_virtual)) {
+        TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
+        return NV_OK;
+    }
+
+    // The input virtual addresses exist in UVM's internal address space, not
+    // the proxy address space
+    if (uvm_channel_is_proxy(push.channel)) {
+        TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
+        return NV_ERR_INVALID_STATE;
+    }
+
+    // Memset src with the appropriate element size, then memcpy to dst and from
+    // dst to the verif location (physical sysmem).
+
+    push_memset(&push, src, value64, element_size, size);
+    gpu->parent->ce_hal->memcopy(&push, dst, src, size);
+    gpu->parent->ce_hal->memcopy(&push, gpu_verif_addr, dst, size);
+
+    TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
+
+    for (i = 0; i < size / element_size; i++) {
+        switch (element_size) {
+            case 1:
+                expected_value = (NvU8)value64;
+                test_value = ((NvU8 *)cpu_verif_addr)[i];
+                break;
+            case 4:
+                expected_value = (NvU32)value64;
+                test_value = ((NvU32 *)cpu_verif_addr)[i];
+                break;
+            case 8:
+                expected_value = value64;
+                test_value = ((NvU64 *)cpu_verif_addr)[i];
+                break;
+            default:
+                UVM_ASSERT(0);
+        }
+
+        if (test_value != expected_value) {
+            UVM_TEST_PRINT("memset_%zu of %s %s and memcpy into %s %s failed, value[%zu] = 0x%llx instead of 0x%llx\n",
+                           element_size, src_type, src_loc, dst_type, dst_loc,
+                           i, test_value, expected_value);
+            return NV_ERR_INVALID_STATE;
+        }
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
+{
+    NV_STATUS status = NV_OK;
+    bool is_proxy_va_space;
+    uvm_gpu_address_t gpu_verif_addr;
+    void *cpu_verif_addr;
+    uvm_mem_t *verif_mem = NULL;
+    uvm_mem_t *sys_uvm_mem = NULL;
+    uvm_mem_t *gpu_uvm_mem = NULL;
+    uvm_rm_mem_t *sys_rm_mem = NULL;
+    uvm_rm_mem_t *gpu_rm_mem = NULL;
+    uvm_gpu_address_t gpu_addresses[4];
+    NvU64 gpu_va;
+    size_t size;
+    static const size_t element_sizes[] = {1, 4, 8};
+    const size_t iterations = 4;
+    size_t i, j, k, s;
+    uvm_mem_alloc_params_t mem_params = {0};
+
+    size = gpu->big_page.internal_size;
+
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
+
+    gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
+    cpu_verif_addr = uvm_mem_get_cpu_addr_kernel(verif_mem);
+
+    for (i = 0; i < iterations; ++i) {
+        for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
+            TEST_NV_CHECK_GOTO(test_unaligned_memset(gpu,
+                                                     gpu_verif_addr,
+                                                     cpu_verif_addr,
+                                                     size,
+                                                     element_sizes[s],
+                                                     i),
+                               done);
+        }
+    }
+
+    // Using a page size equal to the allocation size ensures that the UVM
+    // memories about to be allocated are physically contiguous. And since the
+    // size is a valid GPU page size, the memories can be virtually mapped on
+    // the GPU if needed.
+    mem_params.size = size;
+    mem_params.page_size = size;
+    mem_params.mm = current->mm;
+
+    // Physical address in sysmem
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
+    gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
+
+    // Physical address in vidmem
+    mem_params.backing_gpu = gpu;
+    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
+    gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
+
+    // Virtual address (in UVM's internal address space) backed by vidmem
+    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, &gpu_rm_mem), done);
+    is_proxy_va_space = false;
+    gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
+    gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
+
+    // Virtual address (in UVM's internal address space) backed by sysmem
+    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, &sys_rm_mem), done);
+    gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
+    gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
+
+    for (i = 0; i < iterations; ++i) {
+        for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
+            for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
+                for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
+                    TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
+                                                                    gpu_addresses[k],
+                                                                    gpu_addresses[j],
+                                                                    size,
+                                                                    element_sizes[s],
+                                                                    gpu_verif_addr,
+                                                                    cpu_verif_addr,
+                                                                    i),
+                                       done);
+                }
+            }
+        }
+    }
+
+done:
+    uvm_rm_mem_free(sys_rm_mem);
+    uvm_rm_mem_free(gpu_rm_mem);
+    uvm_mem_free(gpu_uvm_mem);
+    uvm_mem_free(sys_uvm_mem);
+    uvm_mem_free(verif_mem);
+
+    return status;
+}
+
+static NV_STATUS test_semaphore_alloc_sem(uvm_gpu_t *gpu, size_t size, uvm_mem_t **mem_out)
+{
+    NvU64 gpu_va;
+    NV_STATUS status = NV_OK;
+    uvm_mem_t *mem = NULL;
+
+    TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &mem));
+
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(mem, gpu), error);
+
+    gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
+
+    // This semaphore resides in the uvm_mem region, i.e., it has the GPU VA
+    // MSbit set. The intent is to validate semaphore operations when the
+    // semaphore's VA is in the high-end of the GPU effective virtual address
+    // space spectrum, i.e., its VA upper-bit is set.
+    TEST_CHECK_GOTO(gpu_va & (1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1)), error);
+
+    *mem_out = mem;
+
+    return NV_OK;
+
+error:
+    uvm_mem_free(mem);
+    return status;
+}
+
+// test_semaphore_reduction_inc is similar in concept to test_membar(). It uses
+// uvm_mem (instead of uvm_rm_mem) as the semaphore, i.e., it assumes that the
+// CE HAL has been validated, since uvm_mem needs the CE memset/memcopy to be
+// operational as a pre-requisite for GPU PTE writes. The purpose of
+// test_semaphore_reduction_inc is to validate the reduction inc operation on
+// semaphores with their VA's upper-bit set.
+static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_mem_t *mem;
+    NvU64 gpu_va;
+    NvU32 i;
+    NvU32 *host_ptr = NULL;
+    NvU32 value;
+
+    // Semaphore reduction needs 1 word (4 bytes).
+    const size_t size = sizeof(NvU32);
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_CHECK_RET(status == NV_OK);
+
+    // Initialize the counter of reductions.
+    host_ptr = uvm_mem_get_cpu_addr_kernel(mem);
+    TEST_CHECK_GOTO(host_ptr != NULL, done);
+    *host_ptr = 0;
+
+    gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_reduction_inc test");
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    for (i = 0; i < REDUCTIONS; i++) {
+        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+        gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
+    }
+
+    status = uvm_push_end_and_wait(&push);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    value = *host_ptr;
+    if (value != REDUCTIONS) {
+        UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu));
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+done:
+    uvm_mem_free(mem);
+
+    return status;
+}
+
+static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_mem_t *mem;
+    NvU64 gpu_va;
+    NvU32 value;
+    NvU32 *host_ptr = NULL;
+    NvU32 payload = 0xA5A55A5A;
+
+    // Semaphore release needs 1 word (4 bytes).
+    const size_t size = sizeof(NvU32);
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_CHECK_RET(status == NV_OK);
+
+    // Initialize the payload.
+    host_ptr = uvm_mem_get_cpu_addr_kernel(mem);
+    TEST_CHECK_GOTO(host_ptr != NULL, done);
+    *host_ptr = 0;
+
+    gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_release test");
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    gpu->parent->ce_hal->semaphore_release(&push, gpu_va, payload);
+
+    status = uvm_push_end_and_wait(&push);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    value = *host_ptr;
+    if (value != payload) {
+        UVM_TEST_PRINT("Semaphore payload = %u instead of %u, GPU %s\n", value, payload, uvm_gpu_name(gpu));
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+done:
+    uvm_mem_free(mem);
+
+    return status;
+}
+
+static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_mem_t *mem;
+    NvU64 gpu_va;
+    NvU32 i;
+    NvU64 *timestamp;
+    NvU64 last_timestamp = 0;
+
+    // 2 iterations:
+    //   1: compare retrieved timestamp with 0;
+    //   2: compare retrieved timestamp with previous timestamp (obtained in 1).
+    const NvU32 iterations = 2;
+
+    // The semaphore is 4 words long (16 bytes).
+    const size_t size = 16;
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_CHECK_RET(status == NV_OK);
+
+    timestamp = uvm_mem_get_cpu_addr_kernel(mem);
+    TEST_CHECK_GOTO(timestamp != NULL, done);
+    memset(timestamp, 0, size);
+
+    // Shift the timestamp pointer to where the semaphore timestamp info is.
+    timestamp += 1;
+
+    gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
+
+    for (i = 0; i < iterations; i++) {
+        status = uvm_push_begin(gpu->channel_manager,
+                                UVM_CHANNEL_TYPE_GPU_INTERNAL,
+                                &push,
+                                "semaphore_timestamp test, iter: %u",
+                                i);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        gpu->parent->ce_hal->semaphore_timestamp(&push, gpu_va);
+
+        status = uvm_push_end_and_wait(&push);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        TEST_CHECK_GOTO(*timestamp != 0, done);
+        TEST_CHECK_GOTO(*timestamp >= last_timestamp, done);
+        last_timestamp = *timestamp;
+    }
+
+done:
+    uvm_mem_free(mem);
+
+    return status;
+}
+
+static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
+{
+    uvm_gpu_t *gpu;
+
+    for_each_va_space_gpu(gpu, va_space) {
+        TEST_NV_CHECK_RET(test_non_pipelined(gpu));
+        TEST_NV_CHECK_RET(test_membar(gpu));
+        TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
+        TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
+        TEST_NV_CHECK_RET(test_semaphore_release(gpu));
+        if (!skipTimestampTest)
+            TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_test_ce_sanity(UVM_TEST_CE_SANITY_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_va_space_down_read_rm(va_space);
+
+    status = test_ce(va_space, params->skipTimestampTest);
+    if (status != NV_OK)
+        goto done;
+
+done:
+    uvm_va_space_up_read_rm(va_space);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -0,0 +1,487 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_CHANNEL_H__
+#define __UVM_CHANNEL_H__
+
+#include "nv_uvm_types.h"
+#include "uvm_forward_decl.h"
+#include "uvm_gpu_semaphore.h"
+#include "uvm_pushbuffer.h"
+#include "uvm_tracker.h"
+
+//
+// UVM channels
+//
+// A channel manager is created as part of the GPU addition. This involves
+// creating channels for each of the supported types (uvm_channel_type_t) in
+// separate channel pools possibly using different CE instances in the HW. Each
+// channel has a uvm_gpu_tracking_semaphore_t and a set of uvm_gpfifo_entry_t
+// (one per each HW GPFIFO entry) allowing to track completion of pushes on the
+// channel.
+//
+// Beginning a push on a channel implies reserving a GPFIFO entry in that
+// channel and hence there can only be as many on-going pushes per channel as
+// there are free GPFIFO entries. This ensures that ending a push won't have to
+// wait for a GPFIFO entry to free up.
+//
+
+// Channel types
+typedef enum
+{
+    // CPU to GPU copies
+    UVM_CHANNEL_TYPE_CPU_TO_GPU,
+
+    // GPU to CPU copies
+    UVM_CHANNEL_TYPE_GPU_TO_CPU,
+
+    // Memsets and copies within the GPU
+    UVM_CHANNEL_TYPE_GPU_INTERNAL,
+
+    // Memops and small memsets/copies for writing PTEs
+    UVM_CHANNEL_TYPE_MEMOPS,
+
+    // GPU to GPU peer copies
+    UVM_CHANNEL_TYPE_GPU_TO_GPU,
+
+    UVM_CHANNEL_TYPE_CE_COUNT,
+
+    // ^^^^^^
+    // Channel types backed by a CE.
+
+
+
+
+
+
+
+
+
+
+    UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
+
+} uvm_channel_type_t;
+
+typedef enum
+{
+    // A pool that contains CE channels owned by UVM.
+    UVM_CHANNEL_POOL_TYPE_CE = (1 << 0),
+
+    // A proxy pool contains only proxy channels, so it only exists in SR-IOV
+    // heavy. The pool is only used for UVM_CHANNEL_TYPE_MEMOPS pushes.
+    //
+    // A proxy channel is a privileged CE channel owned by the vGPU plugin. A
+    // proxy channel cannot be manipulated directly by the UVM driver, who
+    // instead can only submit work to it by invoking an RM API.
+    //
+    // There is a single proxy pool and channel per GPU.
+    UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
+
+
+
+
+
+
+
+
+    UVM_CHANNEL_POOL_TYPE_COUNT = 2,
+
+
+    // A mask used to select pools of any type.
+    UVM_CHANNEL_POOL_TYPE_MASK  = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
+} uvm_channel_pool_type_t;
+
+struct uvm_gpfifo_entry_struct
+{
+    // Offset of the pushbuffer in the pushbuffer allocation used by this entry
+    NvU32 pushbuffer_offset;
+
+    // Size of the pushbuffer used for this entry
+    NvU32 pushbuffer_size;
+
+    // List node used by the pushbuffer tracking
+    struct list_head pending_list_node;
+
+    // Channel tracking semaphore value that indicates completion of this entry
+    NvU64 tracking_semaphore_value;
+
+    // Push info for the pending push that used this GPFIFO entry
+    uvm_push_info_t *push_info;
+};
+
+// A channel pool is a set of channels that use the same engine. For example,
+// all channels in a CE pool share the same (logical) Copy Engine.
+typedef struct
+{
+    // Owning channel manager
+    uvm_channel_manager_t *manager;
+
+    // Channels in this pool
+    uvm_channel_t *channels;
+
+    // Number of elements in the channel array
+    NvU32 num_channels;
+
+    // Index of the engine associated with the pool (index is an offset from the
+    // first engine of the same engine type.)
+    unsigned engine_index;
+
+    // Pool type: Refer to the uvm_channel_pool_type_t enum.
+    uvm_channel_pool_type_t pool_type;
+
+    // Lock protecting the state of channels in the pool
+    uvm_spinlock_t lock;
+} uvm_channel_pool_t;
+
+struct uvm_channel_struct
+{
+    // Owning pool
+    uvm_channel_pool_t *pool;
+
+    // The channel name contains the CE index, and (for UVM internal channels)
+    // the HW runlist and channel IDs.
+    char name[64];
+
+    // Array of gpfifo entries, one per each HW GPFIFO
+    uvm_gpfifo_entry_t *gpfifo_entries;
+
+    // Number of GPFIFO entries in gpfifo_entries
+    NvU32 num_gpfifo_entries;
+
+    // Latest GPFIFO entry submitted to the GPU
+    // Updated when new pushes are submitted to the GPU in
+    // uvm_channel_end_push().
+    NvU32 cpu_put;
+
+    // Latest GPFIFO entry completed by the GPU
+    // Updated by uvm_channel_update_progress() after checking pending GPFIFOs
+    // for completion.
+    NvU32 gpu_get;
+
+    // Number of currently on-going pushes on this channel
+    // A new push is only allowed to begin on the channel if there is a free
+    // GPFIFO entry for it.
+    NvU32 current_pushes_count;
+
+    // Array of uvm_push_info_t for all pending pushes on the channel
+    uvm_push_info_t *push_infos;
+
+    // Array of uvm_push_acquire_info_t for all pending pushes on the channel.
+    // Each entry corresponds to the push_infos entry with the same index.
+    uvm_push_acquire_info_t *push_acquire_infos;
+
+    // List of uvm_push_info_entry_t that are currently available. A push info
+    // entry is not available if it has been assigned to a push
+    // (uvm_push_begin), and the GPFIFO entry associated with the push has not
+    // been marked as completed.
+    struct list_head available_push_infos;
+
+    // GPU tracking semaphore tracking the work in the channel
+    // Each push on the channel increments the semaphore, see
+    // uvm_channel_end_push().
+    uvm_gpu_tracking_semaphore_t tracking_sem;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    // RM channel information
+    union
+    {
+        // UVM internal channels
+        struct
+        {
+            // UVM-RM interface handle
+            uvmGpuChannelHandle handle;
+
+            // Channel state populated by RM. Includes the GPFIFO, error
+            // notifier, work submission information etc.
+            UvmGpuChannelInfo channel_info;
+        };
+
+        // Proxy channels (SR-IOV heavy only)
+        struct
+        {
+            // UVM-RM interface handle
+            UvmGpuPagingChannelHandle handle;
+
+            // Channel state populated by RM. Includes the error notifier.
+            UvmGpuPagingChannelInfo channel_info;
+        } proxy;
+    };
+
+    struct
+    {
+        struct proc_dir_entry *dir;
+        struct proc_dir_entry *info;
+        struct proc_dir_entry *pushes;
+    } procfs;
+
+    // Information managed by the tools event notification mechanism. Mainly
+    // used to keep a list of channels with pending events, which is needed
+    // to collect the timestamps of asynchronous operations.
+    struct
+    {
+        struct list_head channel_list_node;
+        NvU32 pending_event_count;
+    } tools;
+};
+
+struct uvm_channel_manager_struct
+{
+    // The owning GPU
+    uvm_gpu_t *gpu;
+
+    // The pushbuffer used for all pushes done with this channel manager
+    uvm_pushbuffer_t *pushbuffer;
+
+    // Array of channel pools.
+    uvm_channel_pool_t *channel_pools;
+
+    // Number of elements in the pool array
+    unsigned num_channel_pools;
+
+    // Mask containing the indexes of the usable Copy Engines. Each usable CE
+    // has a pool associated with it, see channel_manager_ce_pool
+    DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
+
+    struct
+    {
+        // Pools to be used by each channel type by default.
+        //
+        // Transfers of a given type may use a pool different from that in
+        // default_for_type[type]. For example, transfers to NvLink GPU
+        // peers may instead use the more optimal pool stored in the gpu_to_gpu
+        // array
+        uvm_channel_pool_t *default_for_type[UVM_CHANNEL_TYPE_COUNT];
+
+        // Optimal pools to use when writing from the owning GPU to its NvLink
+        // peers.
+        // If there is no optimal pool (the entry is NULL), use default pool
+        // default_for_type[UVM_CHANNEL_GPU_TO_GPU] instead.
+        uvm_channel_pool_t *gpu_to_gpu[UVM_ID_MAX_GPUS];
+    } pool_to_use;
+
+    struct
+    {
+        struct proc_dir_entry *channels_dir;
+        struct proc_dir_entry *pending_pushes;
+    } procfs;
+
+    struct
+    {
+        NvU32 num_gpfifo_entries;
+        UVM_BUFFER_LOCATION gpfifo_loc;
+        UVM_BUFFER_LOCATION gpput_loc;
+        UVM_BUFFER_LOCATION pushbuffer_loc;
+    } conf;
+};
+
+// Create a channel manager for the GPU
+NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
+
+static bool uvm_channel_is_proxy(uvm_channel_t *channel)
+{
+    UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
+    return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
+}
+
+static bool uvm_channel_is_ce(uvm_channel_t *channel)
+{
+    UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
+    return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
+}
+
+
+
+
+
+
+
+
+
+// Proxy channels are used to push page tree related methods, so their channel
+// type is UVM_CHANNEL_TYPE_MEMOPS.
+static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
+{
+    return UVM_CHANNEL_TYPE_MEMOPS;
+}
+
+// Privileged channels support all the Host and engine methods, while
+// non-privileged channels don't support privileged methods.
+//
+// A major limitation of non-privileged CE channels is lack of physical
+// addressing support.
+bool uvm_channel_is_privileged(uvm_channel_t *channel);
+
+// Destroy the channel manager
+void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
+
+// Get the current status of the channel
+// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
+// otherwise. Notably this never sets the global fatal error.
+NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
+
+// Check for channel errors
+// Checks for channel errors by calling uvm_channel_get_status(). If an error
+// occurred, sets the global fatal error and prints errors.
+NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
+
+// Check errors on all channels in the channel manager
+// Also includes uvm_global_get_status
+NV_STATUS uvm_channel_manager_check_errors(uvm_channel_manager_t *channel_manager);
+
+// Retrieve the GPFIFO entry that caused a channel error
+// The channel has to be in error state prior to calling this function.
+uvm_gpfifo_entry_t *uvm_channel_get_fatal_entry(uvm_channel_t *channel);
+
+// Update progress of a specific channel
+// Returns the number of still pending GPFIFO entries for that channel.
+// Notably some of the pending GPFIFO entries might be already completed, but
+// the update early-outs after completing a fixed number of them to spread the
+// cost of the updates across calls.
+NvU32 uvm_channel_update_progress(uvm_channel_t *channel);
+
+// Update progress of all channels
+// Returns the number of still pending GPFIFO entries for all channels.
+// Notably some of the pending GPFIFO entries might be already completed, but
+// the update early-outs after completing a fixed number of them to spread the
+// cost of the updates across calls.
+NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager);
+
+// Wait for all channels to idle
+// It waits for anything that is running, but doesn't prevent new work from
+// beginning.
+NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
+
+// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
+// associated with access_channel.
+//
+// The channels can belong to different GPUs, the same GPU, or even be
+// identical, in which case uvm_channel_tracking_semaphore_get_gpu_va can be
+// used instead.
+NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaphore_channel,
+                                                           uvm_channel_t *access_channel);
+
+// See above.
+static NvU64 uvm_channel_tracking_semaphore_get_gpu_va(uvm_channel_t *channel)
+{
+    return uvm_channel_tracking_semaphore_get_gpu_va_in_channel(channel, channel);
+}
+
+// Check whether the channel completed a value
+bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value);
+
+// Update and get the latest completed value by the channel
+NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel);
+
+// Select and reserve a channel with the specified type for a push
+NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager,
+                                   uvm_channel_type_t type,
+                                   uvm_channel_t **channel_out);
+
+// Select and reserve a channel for a transfer from channel_manager->gpu to
+// dst_gpu.
+NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
+                                         uvm_gpu_t *dst_gpu,
+                                         uvm_channel_t **channel_out);
+
+// Reserve a specific channel for a push
+NV_STATUS uvm_channel_reserve(uvm_channel_t *channel);
+
+// Set optimal CE for P2P transfers between manager->gpu and peer
+void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
+
+// Begin a push on a previously reserved channel
+// Should be used by uvm_push_*() only.
+NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
+
+// End a push
+// Should be used by uvm_push_end() only.
+void uvm_channel_end_push(uvm_push_t *push);
+
+const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
+const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
+
+void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
+
+static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
+{
+    return channel->pool->manager->gpu;
+}
+
+// Index of a channel within the owning pool
+static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
+{
+    return channel - channel->pool->channels;
+}
+
+NvU32 uvm_channel_update_progress_all(uvm_channel_t *channel);
+
+// Return an arbitrary channel of the given type(s)
+uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
+
+// Return an arbitrary channel of any type
+static uvm_channel_t *uvm_channel_any(uvm_channel_manager_t *manager)
+{
+    return uvm_channel_any_of_type(manager, UVM_CHANNEL_POOL_TYPE_MASK);
+}
+
+// Helper to iterate over all the channels in a pool.
+#define uvm_for_each_channel_in_pool(channel, pool)                            \
+    for (({UVM_ASSERT(pool->channels);                                         \
+         channel = pool->channels;});                                          \
+         channel != pool->channels + pool->num_channels;                       \
+         channel++)
+
+uvm_channel_pool_t *uvm_channel_pool_first(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
+uvm_channel_pool_t *uvm_channel_pool_next(uvm_channel_manager_t *manager,
+                                          uvm_channel_pool_t *curr_pool,
+                                          NvU32 pool_type_mask);
+
+// Helper to iterate over all the channel pools of the given type(s) in a GPU.
+// The pool mask must not be zero.
+#define uvm_for_each_pool_of_type(pool, manager, pool_type_mask)               \
+    for (pool = uvm_channel_pool_first(manager, pool_type_mask);               \
+         pool != NULL;                                                         \
+         pool = uvm_channel_pool_next(manager, pool, pool_type_mask))
+
+#define uvm_for_each_pool(pool, manager) uvm_for_each_pool_of_type(pool, manager, UVM_CHANNEL_POOL_TYPE_MASK)
+
+#endif // __UVM_CHANNEL_H__
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -0,0 +1,844 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_global.h"
+#include "uvm_channel.h"
+#include "uvm_hal.h"
+#include "uvm_push.h"
+#include "uvm_test.h"
+#include "uvm_test_rng.h"
+#include "uvm_va_space.h"
+#include "uvm_tracker.h"
+#include "uvm_thread_context.h"
+#include "uvm_gpu_semaphore.h"
+#include "uvm_kvmalloc.h"
+
+#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU     1024
+#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
+
+// Schedule pushes one after another on all GPUs and channel types that copy and
+// increment a counter into an adjacent memory location in a buffer. And then
+// verify that all the values are correct on the CPU.
+static NV_STATUS test_ordering(uvm_va_space_t *va_space)
+{
+    NV_STATUS status;
+    uvm_gpu_t *gpu;
+    bool exclude_proxy_channel_type;
+    NvU32 i, j;
+    uvm_rm_mem_t *mem = NULL;
+    NvU32 *host_mem;
+    uvm_push_t push;
+    NvU64 gpu_va;
+    uvm_tracker_t tracker = UVM_TRACKER_INIT();
+    NvU32 value = 0;
+    const NvU32 iters_per_channel_type_per_gpu = g_uvm_global.num_simulated_devices > 0 ?
+                                                     TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU :
+                                                     TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU;
+    const NvU32 values_count = iters_per_channel_type_per_gpu;
+    const size_t buffer_size = sizeof(NvU32) * values_count;
+
+    gpu = uvm_va_space_find_first_gpu(va_space);
+    TEST_CHECK_RET(gpu != NULL);
+
+    status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, &mem);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
+    memset(host_mem, 0, buffer_size);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
+
+    // Semaphore release as part of uvm_push_end() will do the membar
+    uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+    gpu->parent->ce_hal->memset_v_4(&push, gpu_va, 0, buffer_size);
+
+    uvm_push_end(&push);
+
+    TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
+
+    exclude_proxy_channel_type = uvm_gpu_uses_proxy_channel_pool(gpu);
+
+    for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
+        for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
+            uvm_channel_type_t channel_type = j;
+
+            // Proxy channels don't support the virtual memcopies that are about
+            // to be pushed, so don't test the proxy channel type in any of the
+            // GPUs.
+            if (exclude_proxy_channel_type && (channel_type == uvm_channel_proxy_channel_type()))
+                continue;
+
+            for_each_va_space_gpu(gpu, va_space) {
+                NvU64 gpu_va_base;
+                NvU64 gpu_va_src;
+                NvU64 gpu_va_dst;
+
+                status = uvm_push_begin_acquire(gpu->channel_manager,
+                                                channel_type,
+                                                &tracker,
+                                                &push,
+                                                "memcpy and inc to %u",
+                                                value + 1);
+                TEST_CHECK_GOTO(status == NV_OK, done);
+
+                gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
+                gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
+                gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
+
+                // The semaphore reduction will do a membar before the reduction
+                uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+                gpu->parent->ce_hal->memcopy_v_to_v(&push, gpu_va_dst, gpu_va_src, sizeof(NvU32));
+
+                // The following reduction is done from the same GPU, but the
+                // previous memcpy is to uncached sysmem and that bypasses L2
+                // and hence requires a SYSMEMBAR to be ordered.
+                gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va_dst, ++value);
+
+                uvm_push_end(&push);
+
+                uvm_tracker_clear(&tracker);
+                TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
+            }
+        }
+    }
+    status = uvm_tracker_wait(&tracker);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    // At this moment, this should hold:
+    // mem[value % values_count] == value
+    // mem[(value + 1) % values_count]  == value + 1 - values_count
+    // And in general, for i=[0, values_count):
+    // mem[(value + 1 + i) % values_count]  == value + 1 - values_count + i
+    // Verify that
+
+    for (i = 0; i < values_count; ++i) {
+        NvU32 index = (value + 1 + i) % values_count;
+        NvU32 expected = (value + 1 + i) - values_count;
+        if (host_mem[index] != expected) {
+            UVM_TEST_PRINT("Bad value at host_mem[%u] = %u instead of %u\n", index, host_mem[index], expected);
+            status = NV_ERR_INVALID_STATE;
+            goto done;
+        }
+    }
+
+done:
+    uvm_tracker_wait(&tracker);
+    uvm_rm_mem_free(mem);
+
+    return status;
+}
+
+static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
+{
+    uvm_push_t push;
+    uvm_channel_pool_t *pool;
+    uvm_gpfifo_entry_t *fatal_entry;
+    uvm_push_info_t *push_info;
+    int fatal_line;
+    uvm_tracker_entry_t tracker_entry;
+    NV_STATUS status;
+    uvm_tracker_t tracker = UVM_TRACKER_INIT();
+    uvm_channel_manager_t *manager = gpu->channel_manager;
+
+    // Submit a bunch of successful pushes on each channel first so that the
+    // fatal one is behind a bunch of work (notably more than
+    // uvm_channel_update_progress() completes by default).
+    uvm_for_each_pool(pool, manager) {
+        uvm_channel_t *channel;
+
+        uvm_for_each_channel_in_pool(channel, pool) {
+            NvU32 i;
+            for (i = 0; i < 512; ++i) {
+                status = uvm_push_begin_on_channel(channel, &push, "Non-faulting push");
+                TEST_CHECK_RET(status == NV_OK);
+
+                uvm_push_end(&push);
+            }
+        }
+    }
+
+    // Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
+    // mode). It is not allowed to use a virtual address in a memset pushed to
+    // a proxy channel, so we use a physical address instead.
+    if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
+        uvm_gpu_address_t dst_address;
+
+        // Save the line number the push that's supposed to fail was started on
+        fatal_line = __LINE__ + 1;
+        TEST_NV_CHECK_RET(uvm_push_begin(manager, uvm_channel_proxy_channel_type(), &push, "Fatal push 0x%X", 0xBAD));
+
+        // Memset targeting a physical address beyond the vidmem size. The
+        // passed physical address is not the vidmem size reported by RM
+        // because the reported size can be smaller than the actual physical
+        // size, such that accessing a GPA at the reported size may be allowed
+        // by VMMU.
+        //
+        // GA100 GPUs have way less than UVM_GPU_MAX_PHYS_MEM vidmem, so using
+        // that value as physical address should result on an error
+        dst_address = uvm_gpu_address_physical(UVM_APERTURE_VID, UVM_GPU_MAX_PHYS_MEM - 8);
+        gpu->parent->ce_hal->memset_8(&push, dst_address, 0, 8);
+    }
+    else {
+        fatal_line = __LINE__ + 1;
+        TEST_NV_CHECK_RET(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Fatal push 0x%X", 0xBAD));
+
+        // Memset that should fault on 0xFFFFFFFF
+        gpu->parent->ce_hal->memset_v_4(&push, 0xFFFFFFFF, 0, 4);
+    }
+
+    uvm_push_end(&push);
+
+    uvm_push_get_tracker_entry(&push, &tracker_entry);
+    uvm_tracker_overwrite_with_push(&tracker, &push);
+
+    status = uvm_channel_manager_wait(manager);
+    TEST_CHECK_RET(status == NV_ERR_RC_ERROR);
+
+    TEST_CHECK_RET(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR);
+    fatal_entry = uvm_channel_get_fatal_entry(push.channel);
+    TEST_CHECK_RET(fatal_entry != NULL);
+
+    push_info = fatal_entry->push_info;
+    TEST_CHECK_RET(push_info != NULL);
+    TEST_CHECK_RET(push_info->line == fatal_line);
+    TEST_CHECK_RET(strcmp(push_info->function, __FUNCTION__) == 0);
+    TEST_CHECK_RET(strcmp(push_info->filename, kbasename(__FILE__)) == 0);
+    if (uvm_push_info_is_tracking_descriptions())
+        TEST_CHECK_RET(strcmp(push_info->description, "Fatal push 0xBAD") == 0);
+
+    TEST_CHECK_RET(uvm_global_get_status() == NV_ERR_RC_ERROR);
+
+    // Check that waiting for an entry after a global fatal error makes the
+    // entry completed.
+    TEST_CHECK_RET(!uvm_tracker_is_entry_completed(&tracker_entry));
+    TEST_CHECK_RET(uvm_tracker_wait_for_entry(&tracker_entry) == NV_ERR_RC_ERROR);
+    TEST_CHECK_RET(uvm_tracker_is_entry_completed(&tracker_entry));
+
+    // Check that waiting for a tracker after a global fatal error, clears all
+    // the entries from the tracker.
+    TEST_CHECK_RET(!uvm_tracker_is_empty(&tracker));
+    TEST_CHECK_RET(uvm_tracker_wait(&tracker) == NV_ERR_RC_ERROR);
+    TEST_CHECK_RET(uvm_tracker_is_empty(&tracker));
+
+    TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR);
+
+    return NV_OK;
+}
+
+static NV_STATUS test_rc(uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_va_space_gpu(gpu, va_space) {
+        NV_STATUS test_status, create_status;
+
+        // The GPU channel manager is destroyed and then re-created after
+        // testing RC, so this test requires exclusive access to the GPU.
+        TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
+
+        g_uvm_global.disable_fatal_error_assert = true;
+        test_status = uvm_test_rc_for_gpu(gpu);
+        g_uvm_global.disable_fatal_error_assert = false;
+
+        uvm_channel_manager_destroy(gpu->channel_manager);
+        create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
+
+        TEST_NV_CHECK_RET(test_status);
+        TEST_NV_CHECK_RET(create_status);
+    }
+
+    return NV_OK;
+}
+
+
+typedef struct
+{
+    uvm_push_t push;
+    uvm_tracker_t tracker;
+    uvm_gpu_semaphore_t semaphore;
+    NvU32 queued_counter_value;
+    NvU32 queued_counter_repeat;
+    uvm_rm_mem_t *counter_mem;
+    uvm_rm_mem_t *counter_snapshots_mem;
+    uvm_rm_mem_t *other_stream_counter_snapshots_mem;
+    NvU32 *counter_snapshots;
+    NvU32 *other_stream_counter_snapshots;
+    NvU32 *other_stream_counter_expected;
+} uvm_test_stream_t;
+
+#define MAX_COUNTER_REPEAT_COUNT 10 * 1024
+// For each iter, snapshot the first and last counter value
+#define TEST_SNAPSHOT_SIZE(it) (2 * it * sizeof(NvU32))
+
+static void snapshot_counter(uvm_push_t *push,
+                             uvm_rm_mem_t *counter_mem,
+                             uvm_rm_mem_t *snapshot_mem,
+                             NvU32 index,
+                             NvU32 counters_count)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU64 counter_gpu_va;
+    NvU64 snapshot_gpu_va;
+    bool is_proxy_channel;
+    NvU32 last_counter_offset = (counters_count - 1) * sizeof(NvU32);
+
+    if (counters_count == 0)
+        return;
+
+    is_proxy_channel = uvm_channel_is_proxy(push->channel);
+    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
+    snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel) + index * 2 * sizeof(NvU32);
+
+    // Copy the last and first counter to a snapshot for later verification.
+
+    // Membar will be done by uvm_push_end()
+    uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+    uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+    gpu->parent->ce_hal->memcopy_v_to_v(push,
+                                        snapshot_gpu_va + sizeof(NvU32),
+                                        counter_gpu_va + last_counter_offset,
+                                        sizeof(NvU32));
+
+    // Membar will be done by uvm_push_end()
+    uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+    uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+    gpu->parent->ce_hal->memcopy_v_to_v(push, snapshot_gpu_va, counter_gpu_va, sizeof(NvU32));
+}
+
+static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value, NvU32 count)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU64 counter_gpu_va;
+    bool is_proxy_channel;
+
+    is_proxy_channel = uvm_channel_is_proxy(push->channel);
+    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
+
+    gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
+}
+
+static uvm_channel_type_t random_ce_channel_type(uvm_test_rng_t *rng)
+{
+    return (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 1);
+}
+
+static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm_channel_type_t exception)
+{
+    uvm_channel_type_t channel_type;
+
+    UVM_ASSERT(exception < UVM_CHANNEL_TYPE_CE_COUNT);
+
+    channel_type = (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 2);
+
+    if (channel_type >= exception)
+        channel_type++;
+
+    UVM_ASSERT(channel_type < UVM_CHANNEL_TYPE_CE_COUNT);
+
+    return channel_type;
+}
+
+static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
+{
+    if (uvm_gpu_uses_proxy_channel_pool(gpu))
+        return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());
+
+    return random_ce_channel_type(rng);
+}
+
+static uvm_gpu_t *random_va_space_gpu(uvm_test_rng_t *rng, uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+    NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
+    NvU32 gpu_index = uvm_test_rng_range_32(rng, 0, gpu_count - 1);
+
+    UVM_ASSERT(gpu_count > 0);
+
+    for_each_va_space_gpu(gpu, va_space) {
+        if (gpu_index-- == 0)
+            return gpu;
+    }
+
+    UVM_ASSERT(0);
+    return NULL;
+}
+
+
+static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 value)
+{
+    uvm_gpu_t *gpu;
+    NvU64 gpu_va;
+
+    UVM_ASSERT(rm_mem->size % 4 == 0);
+
+    gpu = uvm_push_get_gpu(push);
+    gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel));
+
+    gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
+}
+
+// This test schedules a randomly sized memset on a random channel and GPU in a
+// "stream" that has operations ordered by acquiring the tracker of the previous
+// operation. It also snapshots the memset done by the previous operation in the
+// stream to verify it later on the CPU. Each iteration also optionally acquires
+// a different stream and snapshots its memset.
+// The test ioctl is expected to be called at the same time from multiple
+// threads and contains some schedule() calls to help get as many threads
+// through the init phase before other threads continue. It also has a random
+// schedule() call in the main loop scheduling GPU work.
+static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
+                                            NvU32 num_streams,
+                                            NvU32 iterations_per_stream,
+                                            NvU32 seed,
+                                            NvU32 verbose)
+{
+    NV_STATUS status = NV_OK;
+    uvm_gpu_t *gpu;
+    NvU32 i, j;
+    uvm_test_stream_t *streams;
+    uvm_test_rng_t rng;
+
+    uvm_test_rng_init(&rng, seed);
+
+    gpu = uvm_va_space_find_first_gpu(va_space);
+    TEST_CHECK_RET(gpu != NULL);
+
+    streams = uvm_kvmalloc_zero(sizeof(*streams) * num_streams);
+    TEST_CHECK_RET(streams != NULL);
+
+    // Initialize all the trackers first so that clean up on error can always
+    // wait for them.
+    for (i = 0; i < num_streams; ++i) {
+        uvm_test_stream_t *stream = &streams[i];
+        uvm_tracker_init(&stream->tracker);
+    }
+
+    for (i = 0; i < num_streams; ++i) {
+        uvm_test_stream_t *stream = &streams[i];
+
+        status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &stream->semaphore);
+        if (status != NV_OK)
+            goto done;
+
+        stream->queued_counter_value = 0;
+
+        status = uvm_rm_mem_alloc_and_map_all(gpu,
+                                              UVM_RM_MEM_TYPE_SYS,
+                                              MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
+                                              &stream->counter_mem);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        status = uvm_rm_mem_alloc_and_map_all(gpu,
+                                              UVM_RM_MEM_TYPE_SYS,
+                                              TEST_SNAPSHOT_SIZE(iterations_per_stream),
+                                              &stream->counter_snapshots_mem);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        stream->counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->counter_snapshots_mem);
+
+        status = uvm_rm_mem_alloc_and_map_all(gpu,
+                                              UVM_RM_MEM_TYPE_SYS,
+                                              TEST_SNAPSHOT_SIZE(iterations_per_stream),
+                                              &stream->other_stream_counter_snapshots_mem);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        stream->other_stream_counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->other_stream_counter_snapshots_mem);
+
+        stream->other_stream_counter_expected = uvm_kvmalloc_zero(sizeof(NvU32) * iterations_per_stream);
+        if (stream->other_stream_counter_expected == NULL) {
+            status = NV_ERR_NO_MEMORY;
+            goto done;
+        }
+
+        status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, &stream->push, "stream %u init", i);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        test_memset_rm_mem(&stream->push, stream->counter_mem, 0);
+        test_memset_rm_mem(&stream->push, stream->counter_snapshots_mem, 0);
+        test_memset_rm_mem(&stream->push, stream->other_stream_counter_snapshots_mem, 0);
+
+        status = uvm_push_end_and_wait(&stream->push);
+        TEST_CHECK_GOTO(status == NV_OK, done);
+
+        if (fatal_signal_pending(current)) {
+            status = NV_ERR_SIGNAL_PENDING;
+            goto done;
+        }
+
+        // Let other threads run
+        schedule();
+    }
+
+    if (verbose > 0) {
+        UVM_TEST_PRINT("Init done, seed %u, GPUs:\n", seed);
+        for_each_va_space_gpu(gpu, va_space) {
+            UVM_TEST_PRINT(" GPU %s\n", uvm_gpu_name(gpu));
+        }
+    }
+
+    for (i = 0; i < iterations_per_stream; ++i) {
+        for (j = 0; j < num_streams; ++j) {
+            uvm_test_stream_t *stream = &streams[j];
+            uvm_channel_type_t channel_type;
+            gpu = random_va_space_gpu(&rng, va_space);
+
+            if (fatal_signal_pending(current)) {
+                status = NV_ERR_SIGNAL_PENDING;
+                goto done;
+            }
+
+            // Select a random channel type. In SR-IOV heavy the selection has
+            // to exclude the type associated with proxy channels, because they
+            // do not support the virtual memcopies/memsets pushed by
+            // snapshot_counter and set_counter
+            channel_type = gpu_random_internal_ce_channel_type(gpu, &rng);
+
+            status = uvm_push_begin_acquire(gpu->channel_manager,
+                                            channel_type,
+                                            &stream->tracker,
+                                            &stream->push,
+                                            "stream %u payload %u gid %u channel_type %u",
+                                            j,
+                                            stream->queued_counter_value,
+                                            uvm_id_value(gpu->id),
+                                            channel_type);
+            TEST_CHECK_GOTO(status == NV_OK, done);
+
+            snapshot_counter(&stream->push,
+                             stream->counter_mem,
+                             stream->counter_snapshots_mem,
+                             i,
+                             stream->queued_counter_repeat);
+            // Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
+            stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
+            set_counter(&stream->push,
+                        stream->counter_mem,
+                        ++stream->queued_counter_value,
+                        stream->queued_counter_repeat);
+
+            if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
+                NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
+                uvm_test_stream_t *random_stream = &streams[random_stream_index];
+                uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
+                snapshot_counter(&stream->push,
+                                 random_stream->counter_mem,
+                                 stream->other_stream_counter_snapshots_mem,
+                                 i,
+                                 random_stream->queued_counter_repeat);
+            }
+
+            uvm_push_end(&stream->push);
+            uvm_tracker_clear(&stream->tracker);
+            TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&stream->tracker, &stream->push), done);
+        }
+
+        // Randomly schedule other threads
+        if (uvm_test_rng_range_32(&rng, 0, 9) == 0)
+            schedule();
+    }
+
+    if (verbose > 0)
+        UVM_TEST_PRINT("All work scheduled\n");
+
+    // Let other threads run
+    schedule();
+
+    for (i = 0; i < num_streams; ++i) {
+        uvm_test_stream_t *stream = &streams[i];
+        status = uvm_tracker_wait(&stream->tracker);
+        if (status != NV_OK) {
+            UVM_TEST_PRINT("Failed to wait for the tracker for stream %u: %s\n", i, nvstatusToString(status));
+            goto done;
+        }
+        for (j = 0; j < iterations_per_stream; ++j) {
+            NvU32 snapshot_last = stream->counter_snapshots[j * 2];
+            NvU32 snapshot_first = stream->counter_snapshots[j * 2 + 1];
+            if (snapshot_last != j || snapshot_first != j) {
+                UVM_TEST_PRINT("Stream %u counter snapshot[%u] = %u,%u instead of %u,%u\n",
+                               i,
+                               j,
+                               snapshot_last,
+                               snapshot_first,
+                               j,
+                               j);
+                status = NV_ERR_INVALID_STATE;
+                goto done;
+            }
+        }
+        for (j = 0; j < iterations_per_stream; ++j) {
+            NvU32 snapshot_last = stream->other_stream_counter_snapshots[j * 2];
+            NvU32 snapshot_first = stream->other_stream_counter_snapshots[j * 2 + 1];
+            NvU32 expected = stream->other_stream_counter_expected[j];
+            if (snapshot_last < expected || snapshot_first < expected) {
+                UVM_TEST_PRINT("Stream %u other_counter snapshot[%u] = %u,%u which is < of %u,%u\n",
+                               i,
+                               j,
+                               snapshot_last,
+                               snapshot_first,
+                               expected,
+                               expected);
+                status = NV_ERR_INVALID_STATE;
+                goto done;
+            }
+        }
+    }
+
+    if (verbose > 0)
+        UVM_TEST_PRINT("Verification done\n");
+
+    schedule();
+
+done:
+    // Wait for all the trackers first before freeing up memory as streams
+    // references each other's buffers.
+    for (i = 0; i < num_streams; ++i) {
+        uvm_test_stream_t *stream = &streams[i];
+        uvm_tracker_wait(&stream->tracker);
+    }
+
+    for (i = 0; i < num_streams; ++i) {
+        uvm_test_stream_t *stream = &streams[i];
+        uvm_gpu_semaphore_free(&stream->semaphore);
+        uvm_rm_mem_free(stream->other_stream_counter_snapshots_mem);
+        uvm_rm_mem_free(stream->counter_snapshots_mem);
+        uvm_rm_mem_free(stream->counter_mem);
+        uvm_tracker_deinit(&stream->tracker);
+        uvm_kvfree(stream->other_stream_counter_expected);
+    }
+    uvm_kvfree(streams);
+
+    if (verbose > 0)
+        UVM_TEST_PRINT("Cleanup done\n");
+
+    return status;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+    uvm_va_space_down_read_rm(va_space);
+
+    status = test_ordering(va_space);
+    if (status != NV_OK)
+        goto done;
+
+
+
+
+
+
+
+    if (g_uvm_global.num_simulated_devices == 0) {
+        status = test_rc(va_space);
+        if (status != NV_OK)
+            goto done;
+    }
+
+done:
+    uvm_va_space_up_read_rm(va_space);
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+
+    return status;
+}
+
+static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
+                                                const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    NV_STATUS status;
+
+    if (params->iterations == 0 || params->num_streams == 0)
+        return NV_ERR_INVALID_PARAMETER;
+
+    // TODO: Bug 1764963: Rework the test to not rely on the global lock as that
+    // serializes all the threads calling this at the same time.
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+    uvm_va_space_down_read_rm(va_space);
+
+    status = stress_test_all_gpus_in_va(va_space,
+                                        params->num_streams,
+                                        params->iterations,
+                                        params->seed,
+                                        params->verbose);
+    if (status != NV_OK)
+        goto done;
+
+done:
+    uvm_va_space_up_read_rm(va_space);
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+
+    return status;
+}
+
+static NV_STATUS uvm_test_channel_stress_update_channels(uvm_va_space_t *va_space,
+                                                         const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    NV_STATUS status = NV_OK;
+    uvm_test_rng_t rng;
+    NvU32 i;
+
+    uvm_test_rng_init(&rng, params->seed);
+
+    uvm_va_space_down_read(va_space);
+
+    for (i = 0; i < params->iterations; ++i) {
+        uvm_gpu_t *gpu = random_va_space_gpu(&rng, va_space);
+        uvm_channel_manager_update_progress(gpu->channel_manager);
+
+        if (fatal_signal_pending(current)) {
+            status = NV_ERR_SIGNAL_PENDING;
+            goto done;
+        }
+    }
+
+done:
+    uvm_va_space_up_read(va_space);
+
+    return status;
+}
+
+static NV_STATUS uvm_test_channel_noop_push(uvm_va_space_t *va_space,
+                                            const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
+{
+    NV_STATUS status = NV_OK;
+    uvm_push_t push;
+    uvm_test_rng_t rng;
+    uvm_gpu_t *gpu;
+    NvU32 i;
+
+    uvm_test_rng_init(&rng, params->seed);
+
+    uvm_va_space_down_read(va_space);
+
+    for (i = 0; i < params->iterations; ++i) {
+        uvm_channel_type_t channel_type = random_ce_channel_type(&rng);
+        gpu = random_va_space_gpu(&rng, va_space);
+
+        status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "noop push");
+        if (status != NV_OK)
+            goto done;
+
+        // Push an actual noop method so that the push doesn't get optimized
+        // away if we ever detect empty pushes.
+        gpu->parent->host_hal->noop(&push, UVM_METHOD_SIZE);
+
+        uvm_push_end(&push);
+
+        if (fatal_signal_pending(current)) {
+            status = NV_ERR_SIGNAL_PENDING;
+            goto done;
+        }
+    }
+    if (params->verbose > 0)
+        UVM_TEST_PRINT("Noop pushes: completed %u pushes seed: %u\n", i, params->seed);
+
+    for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
+        NV_STATUS wait_status = uvm_channel_manager_wait(gpu->channel_manager);
+        if (status == NV_OK)
+            status = wait_status;
+    }
+
+done:
+    uvm_va_space_up_read(va_space);
+
+    return status;
+}
+
+NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    switch (params->mode) {
+        case UVM_TEST_CHANNEL_STRESS_MODE_STREAM:
+            return uvm_test_channel_stress_stream(va_space, params);
+        case UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS:
+            return uvm_test_channel_stress_update_channels(va_space, params);
+        case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
+            return uvm_test_channel_noop_push(va_space, params);
+        default:
+            return NV_ERR_INVALID_PARAMETER;
+    }
+}
--- a/kernel-open/nvidia-uvm/uvm_common.c
+++ b/kernel-open/nvidia-uvm/uvm_common.c
@@ -0,0 +1,322 @@
+/*******************************************************************************
+    Copyright (c) 2013-2021 NVIDIA Corporation
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License
+    as published by the Free Software Foundation; either version 2
+    of the License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301, USA.
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_forward_decl.h"
+
+// TODO: Bug 1710855: Tweak this number through benchmarks
+#define UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS   (10*1000ULL)
+#define UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC     30ULL
+
+// Default to debug prints being enabled for debug and develop builds and
+// disabled for release builds.
+static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
+
+// Make the module param writable so that prints can be enabled or disabled at
+// any time by modifying the module parameter.
+module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
+
+bool uvm_debug_prints_enabled()
+{
+    return uvm_debug_prints != 0;
+}
+
+// This parameter allows a program in user mode to call the kernel tests
+// defined in this module. This parameter should only be used for testing and
+// must not be set to true otherwise since it breaks security when it is
+// enabled. By default and for safety reasons this parameter is set to false.
+int uvm_enable_builtin_tests __read_mostly = 0;
+module_param(uvm_enable_builtin_tests, int, S_IRUGO);
+MODULE_PARM_DESC(uvm_enable_builtin_tests,
+                 "Enable the UVM built-in tests. (This is a security risk)");
+
+//
+// Convert kernel errno codes to corresponding NV_STATUS
+//
+NV_STATUS errno_to_nv_status(int errnoCode)
+{
+    if (errnoCode < 0)
+        errnoCode = -errnoCode;
+
+    switch (errnoCode)
+    {
+        case 0:
+            return NV_OK;
+
+        case E2BIG:
+        case EINVAL:
+            return NV_ERR_INVALID_ARGUMENT;
+
+        case EACCES:
+            return NV_ERR_INVALID_ACCESS_TYPE;
+
+        case EADDRINUSE:
+        case EADDRNOTAVAIL:
+            return NV_ERR_UVM_ADDRESS_IN_USE;
+
+        case EFAULT:
+            return NV_ERR_INVALID_ADDRESS;
+
+        case EOVERFLOW:
+            return NV_ERR_OUT_OF_RANGE;
+
+        case EINTR:
+        case EBUSY:
+        case EAGAIN:
+            return NV_ERR_BUSY_RETRY;
+
+        case ENXIO:
+        case ENODEV:
+            return NV_ERR_MODULE_LOAD_FAILED;
+
+        case ENOMEM:
+            return NV_ERR_NO_MEMORY;
+
+        case EPERM:
+            return NV_ERR_INSUFFICIENT_PERMISSIONS;
+
+        case ESRCH:
+            return NV_ERR_PID_NOT_FOUND;
+
+        case ETIMEDOUT:
+            return NV_ERR_TIMEOUT;
+
+        case EEXIST:
+            return NV_ERR_IN_USE;
+
+        case ENOSYS:
+        case EOPNOTSUPP:
+            return NV_ERR_NOT_SUPPORTED;
+
+        case ENOENT:
+            return NV_ERR_NO_VALID_PATH;
+
+        case EIO:
+            return NV_ERR_RC_ERROR;
+
+        case ENODATA:
+            return NV_ERR_OBJECT_NOT_FOUND;
+
+        default:
+            return NV_ERR_GENERIC;
+    };
+}
+
+// Returns POSITIVE errno
+int nv_status_to_errno(NV_STATUS status)
+{
+    switch (status) {
+        case NV_OK:
+            return 0;
+
+        case NV_ERR_BUSY_RETRY:
+            return EAGAIN;
+
+        case NV_ERR_INSUFFICIENT_PERMISSIONS:
+            return EPERM;
+
+        case NV_ERR_GPU_UUID_NOT_FOUND:
+            return ENODEV;
+
+        case NV_ERR_INSUFFICIENT_RESOURCES:
+        case NV_ERR_NO_MEMORY:
+            return ENOMEM;
+
+        case NV_ERR_INVALID_ACCESS_TYPE:
+            return EACCES;
+
+        case NV_ERR_INVALID_ADDRESS:
+            return EFAULT;
+
+        case NV_ERR_INVALID_ARGUMENT:
+        case NV_ERR_INVALID_DEVICE:
+        case NV_ERR_INVALID_PARAMETER:
+        case NV_ERR_INVALID_REQUEST:
+        case NV_ERR_INVALID_STATE:
+            return EINVAL;
+
+        case NV_ERR_NOT_SUPPORTED:
+            return ENOSYS;
+
+        case NV_ERR_OBJECT_NOT_FOUND:
+            return ENODATA;
+
+        case NV_ERR_MODULE_LOAD_FAILED:
+            return ENXIO;
+
+        case NV_ERR_OVERLAPPING_UVM_COMMIT:
+        case NV_ERR_UVM_ADDRESS_IN_USE:
+            return EADDRINUSE;
+
+        case NV_ERR_PID_NOT_FOUND:
+            return ESRCH;
+
+        case NV_ERR_TIMEOUT:
+        case NV_ERR_TIMEOUT_RETRY:
+            return ETIMEDOUT;
+
+        case NV_ERR_IN_USE:
+            return EEXIST;
+
+        case NV_ERR_NO_VALID_PATH:
+            return ENOENT;
+
+        case NV_ERR_RC_ERROR:
+        case NV_ERR_ECC_ERROR:
+            return EIO;
+
+        case NV_ERR_OUT_OF_RANGE:
+            return EOVERFLOW;
+
+        default:
+            UVM_ASSERT_MSG(0, "No errno conversion set up for NV_STATUS %s\n", nvstatusToString(status));
+            return EINVAL;
+    }
+}
+
+//
+// This routine retrieves the process ID of current, but makes no attempt to
+// refcount or lock the pid in place.
+//
+unsigned uvm_get_stale_process_id(void)
+{
+    return (unsigned)task_tgid_vnr(current);
+}
+
+unsigned uvm_get_stale_thread_id(void)
+{
+    return (unsigned)task_pid_vnr(current);
+}
+
+//
+// A simple security rule for allowing access to UVM user space memory: if you
+// are the same user as the owner of the memory, or if you are root, then you
+// are granted access. The idea is to allow debuggers and profilers to work, but
+// without opening up any security holes.
+//
+NvBool uvm_user_id_security_check(uid_t euidTarget)
+{
+    return (NV_CURRENT_EUID() == euidTarget) ||
+           (UVM_ROOT_UID == euidTarget);
+}
+
+void on_uvm_test_fail(void)
+{
+    (void)NULL;
+}
+
+void on_uvm_assert(void)
+{
+    (void)NULL;
+#ifdef __COVERITY__
+    __coverity_panic__()
+#endif
+}
+
+NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
+{
+    NvU64 curr = NV_GETTIME();
+
+    // This schedule() is required for functionality, not just system
+    // performance. It allows RM to run and unblock the UVM driver:
+    //
+    // - UVM must service faults in order for RM to idle/preempt a context
+    // - RM must service interrupts which stall UVM (SW methods, stalling CE
+    //   interrupts, etc) in order for UVM to service faults
+    //
+    // Even though UVM's bottom half is preemptable, we have encountered cases
+    // in which a user thread running in RM won't preempt the UVM driver's
+    // thread unless the UVM driver thread gives up its timeslice. This is also
+    // theoretically possible if the RM thread has a low nice priority.
+    //
+    // TODO: Bug 1710855: Look into proper prioritization of these threads as a longer-term
+    //       solution.
+    if (curr - spin->start_time_ns >= UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS && NV_MAY_SLEEP()) {
+        schedule();
+        curr = NV_GETTIME();
+    }
+
+    cpu_relax();
+
+    // TODO: Bug 1710855: Also check fatal_signal_pending() here if the caller can handle it.
+
+    if (curr - spin->print_time_ns >= 1000*1000*1000*UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC) {
+        spin->print_time_ns = curr;
+        return NV_ERR_TIMEOUT_RETRY;
+    }
+
+    return NV_OK;
+}
+
+// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
+// what nvidia-smi reports.  It will always prefix the UUID with UVM-GPU so
+// that we know that we have a real, binary formatted UUID that will work in
+// the UVM APIs.
+//
+// It comes out like this:
+//
+//     UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
+//
+//  This routine will always null-terminate the string for you. This is true
+//  even if the buffer was too small!
+//
+//  Return value is the number of non-null characters written.
+//
+// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
+// return it's default format, which is ascii, not binary, then you would get
+// this back:
+//
+//     GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
+//
+//  ...which is actually a character string, and won't work for UVM API calls.
+//  So it's very important to be able to see the difference.
+//
+static char uvm_digit_to_hex(unsigned value)
+{
+    if (value >= 10)
+        return value - 10 + 'a';
+    else
+        return value + '0';
+}
+
+int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
+{
+    char *str = buffer+8;
+    unsigned i;
+    unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
+
+    memcpy(buffer, "UVM-GPU-", 8);
+    if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
+        return *buffer = 0;
+
+    for (i = 0; i < 16; i++) {
+        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
+        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
+
+        if (dashMask & (1 << (i+1)))
+            *str++ = '-';
+    }
+
+    *str = 0;
+
+    return (int)(str-buffer);
+}
+
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -0,0 +1,357 @@
+/*******************************************************************************
+    Copyright (c) 2013-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _UVM_COMMON_H
+#define _UVM_COMMON_H
+
+#ifdef DEBUG
+    #define UVM_IS_DEBUG() 1
+#else
+    #define UVM_IS_DEBUG() 0
+#endif
+
+// NVIDIA_UVM_DEVELOP implies DEBUG, but not vice-versa
+// TODO Bug 1773100: Figure out the right distinction between develop and debug
+// builds.
+#ifdef NVIDIA_UVM_DEVELOP
+    #define UVM_IS_DEVELOP() 1
+#else
+    #define UVM_IS_DEVELOP() 0
+#endif
+
+#include "uvm_types.h"
+#include "uvm_linux.h"
+
+enum {
+    NVIDIA_UVM_PRIMARY_MINOR_NUMBER = 0,
+    NVIDIA_UVM_TOOLS_MINOR_NUMBER   = 1,
+    // to ensure backward-compatiblity and correct counting, please insert any
+    // new minor devices just above the following field:
+    NVIDIA_UVM_NUM_MINOR_DEVICES
+};
+
+#define UVM_GPU_UUID_TEXT_BUFFER_LENGTH (8+16*2+4+1)
+
+int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pGpuUuid);
+
+#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
+    func(prefix "%s:%u %s[pid:%d]" fmt,               \
+         kbasename(__FILE__),                         \
+         __LINE__,                                    \
+         __FUNCTION__,                                \
+         current->pid,                                \
+         ##__VA_ARGS__)
+
+#define UVM_PRINT_FUNC(func, fmt, ...)  \
+    UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
+
+// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
+bool uvm_debug_prints_enabled(void);
+
+// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
+// uvm_debug_prints_enabled() returns true.
+#define UVM_PRINT_FUNC_PREFIX_CHECK(func, prefix, fmt, ...)             \
+    do {                                                                \
+        if (uvm_debug_prints_enabled()) {                               \
+            UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ##__VA_ARGS__);    \
+        }                                                               \
+    } while (0)
+
+#define UVM_ASSERT_PRINT(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+#define UVM_ERR_PRINT(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+#define UVM_ERR_PRINT_RL(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+#define UVM_DBG_PRINT(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+#define UVM_DBG_PRINT_RL(fmt, ...)                              \
+    UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+#define UVM_INFO_PRINT(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
+//
+// Please see the documentation of format_uuid_to_buffer, for details on what
+// this routine prints for you.
+//
+#define UVM_DBG_PRINT_UUID(msg, uuidPtr)                                \
+    do {                                                                \
+        char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];               \
+        format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
+        UVM_DBG_PRINT("%s: %s\n", msg, uuidBuffer);                     \
+    } while (0)
+
+#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...)                        \
+    UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
+
+#define UVM_ERR_PRINT_UUID(msg, uuidPtr, ...)                              \
+    do {                                                                   \
+        char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];                  \
+        format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr);    \
+        UVM_ERR_PRINT("ERROR: %s : " msg "\n", uuidBuffer, ##__VA_ARGS__); \
+    } while (0)
+
+#define UVM_PANIC()             UVM_PRINT_FUNC(panic, "\n")
+#define UVM_PANIC_MSG(fmt, ...) UVM_PRINT_FUNC(panic, ": " fmt, ##__VA_ARGS__)
+
+#define UVM_PANIC_ON_MSG(cond, fmt, ...)        \
+    do {                                        \
+        if (unlikely(cond))                     \
+            UVM_PANIC_MSG(fmt, ##__VA_ARGS__);  \
+    } while (0)
+
+#define UVM_PANIC_ON(cond)  UVM_PANIC_ON_MSG(cond, "failed cond %s\n", #cond)
+
+// expr may include function calls. Use sizeof to prevent it from being
+// evaluated while also preventing unused variable warnings. sizeof() can't be
+// used on a bitfield however, so use ! to force the expression to evaluate as
+// an int.
+#define UVM_IGNORE_EXPR(expr) ((void)sizeof(!(expr)))
+
+#define UVM_IGNORE_EXPR2(expr1, expr2)  \
+    do {                                \
+        UVM_IGNORE_EXPR(expr1);         \
+        UVM_IGNORE_EXPR(expr2);         \
+    } while (0)
+
+// NO-OP function to break on_uvm_test_fail - that is just to set a breakpoint
+void on_uvm_test_fail(void);
+
+// NO-OP function to break on_uvm_assert - that is just to set a breakpoint
+// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
+void on_uvm_assert(void);
+
+// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always enabled, even on
+// release builds.
+#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                           \
+    do {                                                                                        \
+        if (unlikely(!(expr))) {                                                                \
+            UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
+            dump_stack();                                                                       \
+            on_uvm_assert();                                                                    \
+        }                                                                                       \
+    } while (0)
+
+#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
+#define UVM_ASSERT_RELEASE(expr)                _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
+
+// Prevent function calls in expr and the print argument list from being
+// evaluated.
+#define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...)   \
+    do {                                        \
+        UVM_IGNORE_EXPR(expr);                  \
+        UVM_NO_PRINT(fmt, ##__VA_ARGS__);       \
+    } while (0)
+
+// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
+#if UVM_IS_DEBUG() || defined __COVERITY__
+    #define UVM_ASSERT_MSG                  UVM_ASSERT_MSG_RELEASE
+    #define UVM_ASSERT                      UVM_ASSERT_RELEASE
+#else
+    #define UVM_ASSERT_MSG(expr, fmt, ...)  UVM_ASSERT_MSG_IGNORE(expr, fmt, ##__VA_ARGS__)
+    #define UVM_ASSERT(expr)                UVM_ASSERT_MSG_IGNORE(expr, "\n")
+#endif
+
+// Provide a short form of UUID's, typically for use in debug printing:
+#define ABBREV_UUID(uuid) (unsigned)(uuid)
+
+static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
+{
+    return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
+}
+
+#define UVM_ALIGN_DOWN(x, a) ({         \
+        typeof(x) _a = a;               \
+        UVM_ASSERT(is_power_of_2(_a));  \
+        (x) & ~(_a - 1);                \
+    })
+
+#define UVM_ALIGN_UP(x, a) ({           \
+        typeof(x) _a = a;               \
+        UVM_ASSERT(is_power_of_2(_a));  \
+        ((x) + _a - 1) & ~(_a - 1);     \
+    })
+
+#define UVM_PAGE_ALIGN_UP(value) UVM_ALIGN_UP(value, PAGE_SIZE)
+#define UVM_PAGE_ALIGN_DOWN(value) UVM_ALIGN_DOWN(value, PAGE_SIZE)
+
+// These macros provide a convenient way to string-ify enum values.
+#define UVM_ENUM_STRING_CASE(value) case value: return #value
+#define UVM_ENUM_STRING_DEFAULT() default: return "UNKNOWN"
+
+// Divide by a dynamic value known at runtime to be a power of 2. ilog2 is
+// optimized as a single instruction in many processors, whereas integer
+// division is always slow.
+static inline NvU32 uvm_div_pow2_32(NvU32 numerator, NvU32 denominator_pow2)
+{
+    UVM_ASSERT(is_power_of_2(denominator_pow2));
+    UVM_ASSERT(denominator_pow2);
+    return numerator >> ilog2(denominator_pow2);
+}
+
+static inline NvU64 uvm_div_pow2_64(NvU64 numerator, NvU64 denominator_pow2)
+{
+    UVM_ASSERT(is_power_of_2(denominator_pow2));
+    UVM_ASSERT(denominator_pow2);
+    return numerator >> ilog2(denominator_pow2);
+}
+
+#define SUM_FROM_0_TO_N(n) (((n) * ((n) + 1)) / 2)
+
+// Start and end are inclusive
+static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_start, NvU64 b_end)
+{
+    // De Morgan's of: !(a_end < b_start || b_end < a_start)
+    return a_end >= b_start && b_end >= a_start;
+}
+
+static int debug_mode(void)
+{
+#ifdef DEBUG
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
+{
+    if (ppCache)
+    {
+        if (*ppCache)
+            kmem_cache_destroy(*ppCache);
+
+        *ppCache = NULL;
+    }
+}
+
+static const uid_t UVM_ROOT_UID = 0;
+
+
+typedef struct
+{
+    NvU64 start_time_ns;
+    NvU64 print_time_ns;
+} uvm_spin_loop_t;
+
+static inline void uvm_spin_loop_init(uvm_spin_loop_t *spin)
+{
+    NvU64 curr = NV_GETTIME();
+    spin->start_time_ns = curr;
+    spin->print_time_ns = curr;
+}
+
+// Periodically yields the CPU when not called from interrupt context. Returns
+// NV_ERR_TIMEOUT_RETRY if the caller should print a warning that we've been
+// waiting too long, and NV_OK otherwise.
+NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin);
+
+static NvU64 uvm_spin_loop_elapsed(const uvm_spin_loop_t *spin)
+{
+    NvU64 curr = NV_GETTIME();
+    return curr - spin->start_time_ns;
+}
+
+#define UVM_SPIN_LOOP(__spin) ({                                                        \
+    NV_STATUS __status = uvm_spin_loop(__spin);                                         \
+    if (__status == NV_ERR_TIMEOUT_RETRY) {                                             \
+        UVM_DBG_PRINT("Warning: stuck waiting for %llus\n",                             \
+                      uvm_spin_loop_elapsed(__spin) / (1000*1000*1000));                \
+                                                                                        \
+        if (uvm_debug_prints_enabled())                                                 \
+            dump_stack();                                                               \
+    }                                                                                   \
+    __status;                                                                           \
+})
+
+// Execute the loop code while cond is true. Invokes uvm_spin_loop_iter at the
+// end of each iteration.
+#define UVM_SPIN_WHILE(cond, spin)                                                \
+    if (cond)                                                                     \
+        for (uvm_spin_loop_init(spin); (cond); UVM_SPIN_LOOP(spin))
+
+//
+// Documentation for the internal routines listed below may be found in the
+// implementation file(s).
+//
+NV_STATUS errno_to_nv_status(int errnoCode);
+int nv_status_to_errno(NV_STATUS status);
+unsigned uvm_get_stale_process_id(void);
+unsigned uvm_get_stale_thread_id(void);
+NvBool uvm_user_id_security_check(uid_t euidTarget);
+
+extern int uvm_enable_builtin_tests;
+
+static inline void uvm_init_character_device(struct cdev *cdev, const struct file_operations *fops)
+{
+    cdev_init(cdev, fops);
+    cdev->owner = THIS_MODULE;
+}
+
+typedef struct
+{
+    int rm_control_fd;
+    NvHandle user_client;
+    NvHandle user_object;
+} uvm_rm_user_object_t;
+
+// Macro used to compare two values for types that support less than operator.
+// It returns -1 if a < b, 1 if a > b and 0 if a == 0
+#define UVM_CMP_DEFAULT(a,b)              \
+({                                        \
+    typeof(a) _a = a;                     \
+    typeof(b) _b = b;                     \
+    int __ret;                            \
+    BUILD_BUG_ON(sizeof(a) != sizeof(b)); \
+    if (_a < _b)                          \
+        __ret = -1;                       \
+    else if (_b < _a)                     \
+        __ret = 1;                        \
+    else                                  \
+        __ret = 0;                        \
+                                          \
+    __ret;                                \
+})
+
+// Returns whether the input file was opened against the UVM character device
+// file. A NULL input returns false.
+bool uvm_file_is_nvidia_uvm(struct file *filp);
+
+// Reads the first word in the supplied struct page.
+static inline void uvm_touch_page(struct page *page)
+{
+    char *mapping;
+
+    UVM_ASSERT(page);
+
+    mapping = (char *) kmap(page);
+    (void)UVM_READ_ONCE(*mapping);
+    kunmap(page);
+}
+
+#endif /* _UVM_COMMON_H */
--- a/kernel-open/nvidia-uvm/uvm_debug_optimized.c
+++ b/kernel-open/nvidia-uvm/uvm_debug_optimized.c
@@ -0,0 +1,53 @@
+/*******************************************************************************
+    Copyright (c) 2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+// This file provides simple wrappers that are always built with optimizations
+// turned on to WAR issues with functions that don't build correctly otherwise.
+
+#include "uvm_linux.h"
+
+int nv_atomic_xchg(atomic_t *val, int new)
+{
+    return atomic_xchg(val, new);
+}
+
+int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
+{
+    return atomic_cmpxchg(val, old, new);
+}
+
+long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
+{
+    return atomic_long_cmpxchg(val, old, new);
+}
+
+unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+    return copy_from_user(to, from, n);
+}
+
+unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+    return copy_to_user(to, from, n);
+}
+
--- a/kernel-open/nvidia-uvm/uvm_extern_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_extern_decl.h
@@ -0,0 +1,38 @@
+/*******************************************************************************
+    Copyright (c) 2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_EXTERN_DECL_H__
+#define __UVM_EXTERN_DECL_H__
+
+#include "uvm_linux.h"
+#include "uvm_forward_decl.h"
+
+extern int uvm_enable_debug_procfs;
+
+extern unsigned uvm_perf_map_remote_on_native_atomics_fault;
+
+extern uvm_global_t g_uvm_global;
+
+extern bool uvm_global_is_suspended(void);
+
+#endif //__UVM_EXTERN_DECL_H__
--- a/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
+++ b/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
@@ -0,0 +1,69 @@
+/*******************************************************************************
+    Copyright (c) 2016-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_global.h"
+#include "uvm_gpu.h"
+#include "uvm_gpu_replayable_faults.h"
+#include "uvm_test.h"
+#include "uvm_va_space.h"
+
+NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    uvm_gpu_t *gpu;
+    uvm_global_processor_mask_t retained_gpus;
+    NvU64 i;
+
+    uvm_global_processor_mask_zero(&retained_gpus);
+
+    uvm_va_space_down_read(va_space);
+
+    for_each_va_space_gpu(gpu, va_space) {
+        if (gpu->parent->replayable_faults_supported)
+            uvm_global_processor_mask_set(&retained_gpus, gpu->global_id);
+    }
+
+    uvm_global_mask_retain(&retained_gpus);
+
+    uvm_va_space_up_read(va_space);
+
+    if (uvm_global_processor_mask_empty(&retained_gpus))
+        return NV_ERR_INVALID_DEVICE;
+
+    for (i = 0; i < params->iterations; i++) {
+        if (fatal_signal_pending(current)) {
+            status = NV_ERR_SIGNAL_PENDING;
+            break;
+        }
+
+        for_each_global_gpu_in_mask(gpu, &retained_gpus)
+            TEST_CHECK_GOTO(uvm_gpu_fault_buffer_flush(gpu) == NV_OK, out);
+    }
+
+out:
+    uvm_global_mask_release(&retained_gpus);
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -0,0 +1,98 @@
+/*******************************************************************************
+    Copyright (c) 2015-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_FORWARD_DECL_H__
+#define __UVM_FORWARD_DECL_H__
+
+typedef struct uvm_global_struct uvm_global_t;
+
+typedef struct uvm_gpu_struct uvm_gpu_t;
+typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
+typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
+typedef struct uvm_mem_struct uvm_mem_t;
+typedef struct uvm_host_hal_struct uvm_host_hal_t;
+typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
+typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
+typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
+typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
+
+
+
+typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
+typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
+typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
+typedef struct uvm_gpu_semaphore_pool_page_struct uvm_gpu_semaphore_pool_page_t;
+typedef struct uvm_gpu_peer_struct uvm_gpu_peer_t;
+typedef struct uvm_mmu_mode_hal_struct uvm_mmu_mode_hal_t;
+
+typedef struct uvm_channel_manager_struct uvm_channel_manager_t;
+typedef struct uvm_channel_struct uvm_channel_t;
+typedef struct uvm_user_channel_struct uvm_user_channel_t;
+typedef struct uvm_push_struct uvm_push_t;
+typedef struct uvm_push_info_struct uvm_push_info_t;
+typedef struct uvm_push_acquire_info_struct uvm_push_acquire_info_t;
+typedef struct uvm_pushbuffer_struct uvm_pushbuffer_t;
+typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
+
+typedef struct uvm_va_policy_struct uvm_va_policy_t;
+typedef struct uvm_va_range_struct uvm_va_range_t;
+typedef struct uvm_va_block_struct uvm_va_block_t;
+typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
+typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
+typedef struct uvm_va_space_struct uvm_va_space_t;
+typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;
+
+typedef struct uvm_make_resident_context_struct uvm_make_resident_context_t;
+
+typedef struct uvm_gpu_va_space_struct uvm_gpu_va_space_t;
+
+typedef struct uvm_thread_context_lock_struct uvm_thread_context_lock_t;
+typedef struct uvm_thread_context_struct uvm_thread_context_t;
+typedef struct uvm_thread_context_wrapper_struct uvm_thread_context_wrapper_t;
+
+typedef struct uvm_perf_module_struct uvm_perf_module_t;
+
+typedef struct uvm_page_table_range_vec_struct uvm_page_table_range_vec_t;
+typedef struct uvm_page_table_range_struct uvm_page_table_range_t;
+typedef struct uvm_page_tree_struct uvm_page_tree_t;
+
+typedef struct uvm_fault_buffer_entry_struct uvm_fault_buffer_entry_t;
+
+typedef struct uvm_pte_batch_struct uvm_pte_batch_t;
+typedef struct uvm_tlb_batch_struct uvm_tlb_batch_t;
+
+typedef struct uvm_fault_service_batch_context_struct uvm_fault_service_batch_context_t;
+typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
+
+typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
+
+typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
+typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
+typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
+typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
+typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
+
+typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
+
+typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
+#endif //__UVM_FORWARD_DECL_H__
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@@ -0,0 +1,352 @@
+/*******************************************************************************
+    Copyright (c) 2016-2021 NVidia Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#include "uvm_test.h"
+#include "uvm_test_ioctl.h"
+#include "uvm_global.h"
+#include "uvm_gpu.h"
+#include "uvm_hal.h"
+#include "uvm_va_space.h"
+#include "uvm_mmu.h"
+#include "nv_uvm_types.h"
+#include "nv_uvm_interface.h"
+#include "uvm_common.h"
+
+#define get_rm_ptes(offset, size, ext_map_info)          \
+        uvm_rm_locked_call(                              \
+        nvUvmInterfaceGetExternalAllocPtes(gpu_va_space, \
+                                           duped_memory, \
+                                           offset,       \
+                                           size,         \
+                                           ext_map_info))
+
+static uvm_aperture_t get_aperture(uvm_va_space_t *va_space,
+                                   uvm_gpu_t *memory_owning_gpu,
+                                   uvm_gpu_t *memory_mapping_gpu,
+                                   UvmGpuMemoryInfo *memory_info,
+                                   bool sli_supported)
+{
+    if (memory_info->sysmem) {
+        return UVM_APERTURE_SYS;
+    }
+    else {
+        if (memory_mapping_gpu != memory_owning_gpu && !sli_supported)
+            return uvm_gpu_peer_aperture(memory_mapping_gpu, memory_owning_gpu);
+        return UVM_APERTURE_VID;
+    }
+}
+
+static bool is_cacheable(UvmGpuExternalMappingInfo *ext_mapping_info, uvm_aperture_t aperture)
+{
+    if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceCached)
+        return true;
+    else if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceUncached)
+        return false;
+    else if (aperture == UVM_APERTURE_VID)
+        return true;
+
+    return false;
+}
+
+static NvU32 get_protection(UvmGpuExternalMappingInfo *ext_mapping_info)
+{
+    if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWriteAtomic ||
+        ext_mapping_info->mappingType == UvmRmGpuMappingTypeDefault)
+        return UVM_PROT_READ_WRITE_ATOMIC;
+    else if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWrite)
+        return UVM_PROT_READ_WRITE;
+    else
+        return UVM_PROT_READ_ONLY;
+}
+
+static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
+                                     uvm_gpu_t *memory_mapping_gpu,
+                                     NvU64 mapping_offset,
+                                     NvU64 mapping_size,
+                                     UvmGpuExternalMappingInfo *ext_mapping_info,
+                                     UvmGpuMemoryInfo *memory_info,
+                                     bool sli_supported)
+{
+    NvU32 index = 0, total_pte_count = 0, skip = 0, page_size = 0;
+    uvm_aperture_t aperture = 0;
+    NvU32 prot;
+    NvU64 phys_offset, pte;
+    uvm_mmu_mode_hal_t *hal;
+    NvU64 pte_flags = UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED;
+    uvm_gpu_t *memory_owning_gpu = NULL;
+
+    TEST_CHECK_RET(memory_info->contig);
+
+    hal = uvm_gpu_va_space_get(va_space, memory_mapping_gpu)->page_tables.hal;
+
+    page_size = memory_info->pageSize;
+
+    // Verify that make_pte supports this page size
+    TEST_CHECK_RET(page_size & hal->page_sizes());
+
+    total_pte_count = mapping_size ? (mapping_size / page_size) : (memory_info->size / page_size);
+
+    TEST_CHECK_RET(total_pte_count);
+
+    TEST_CHECK_RET(ext_mapping_info->numWrittenPtes <= total_pte_count);
+
+    TEST_CHECK_RET(ext_mapping_info->numRemainingPtes == (total_pte_count - ext_mapping_info->numWrittenPtes));
+
+    skip = ext_mapping_info->pteSize / sizeof(NvU64);
+
+    TEST_CHECK_RET(skip);
+
+    memory_owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &memory_info->uuid);
+    if (memory_owning_gpu == NULL)
+        return NV_ERR_INVALID_DEVICE;
+
+    // TODO: Bug 1903234: Once RM supports indirect peer mappings, we'll need to
+    //       update this test since the aperture will be SYS. Depending on how
+    //       RM implements things, we might not be able to compare the physical
+    //       addresses either.
+    aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
+
+    if (is_cacheable(ext_mapping_info, aperture))
+        pte_flags |= UVM_MMU_PTE_FLAGS_CACHED;
+
+    prot = get_protection(ext_mapping_info);
+
+    phys_offset = mapping_offset;
+
+    // Add the physical offset for nvswitch connected peer mappings
+    if (uvm_aperture_is_peer(aperture) && uvm_gpus_are_nvswitch_connected(memory_mapping_gpu, memory_owning_gpu))
+        phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
+
+    for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
+
+        pte = hal->make_pte(aperture,
+                            memory_info->physAddr + phys_offset,
+                            prot,
+                            pte_flags);
+
+        TEST_CHECK_RET(pte == ext_mapping_info->pteBuffer[index * skip]);
+
+        phys_offset += page_size;
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
+{
+    NV_STATUS status = NV_OK;
+    NV_STATUS free_status;
+    uvm_gpu_t *memory_mapping_gpu;
+    NvHandle duped_memory;
+    UvmGpuExternalMappingInfo ext_mapping_info;
+    UvmGpuMemoryInfo memory_info;
+    NvU64 pte_buffer[16] = {0};
+    NvU32 size = 0;
+    uvmGpuAddressSpaceHandle gpu_va_space;
+    uvmGpuDeviceHandle rm_device;
+    NvHandle client, memory;
+
+    client = params->hClient;
+    memory = params->hMemory;
+
+    // Note: This check is safe as single GPU test does not run on SLI enabled devices.
+    memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, &params->gpu_uuid);
+    if (!memory_mapping_gpu)
+        return NV_ERR_INVALID_DEVICE;
+
+    gpu_va_space = memory_mapping_gpu->rm_address_space;
+    rm_device = uvm_gpu_device_handle(memory_mapping_gpu);
+
+    status = uvm_rm_locked_call(nvUvmInterfaceDupMemory(rm_device, client, memory, &duped_memory, &memory_info));
+    if (status != NV_OK)
+        return status;
+
+    TEST_CHECK_GOTO(uvm_processor_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);
+
+    TEST_CHECK_GOTO((memory_info.size == params->size), done);
+
+    size = params->size;
+
+    memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
+
+    ext_mapping_info.pteBuffer = pte_buffer;
+
+    ext_mapping_info.pteBufferSize = 1;
+
+    TEST_CHECK_GOTO((get_rm_ptes(size + 1, 0, &ext_mapping_info) == NV_ERR_INVALID_BASE), done);
+
+    TEST_CHECK_GOTO((get_rm_ptes(0, size + 1, &ext_mapping_info) == NV_ERR_INVALID_LIMIT), done);
+
+    TEST_CHECK_GOTO((get_rm_ptes(1, 0, &ext_mapping_info) == NV_ERR_INVALID_ARGUMENT), done);
+
+    TEST_CHECK_GOTO((get_rm_ptes(0, size - 1, &ext_mapping_info) == NV_ERR_INVALID_ARGUMENT), done);
+
+    TEST_CHECK_GOTO((get_rm_ptes(0, 0, &ext_mapping_info) == NV_ERR_BUFFER_TOO_SMALL), done);
+
+    ext_mapping_info.pteBufferSize = sizeof(pte_buffer);
+    TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_OK, done);
+    TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                        memory_mapping_gpu,
+                                        0,
+                                        0,
+                                        &ext_mapping_info,
+                                        &memory_info,
+                                        false) == NV_OK, done);
+
+    TEST_CHECK_GOTO(get_rm_ptes(memory_info.pageSize, 0, &ext_mapping_info) == NV_OK, done);
+    TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                        memory_mapping_gpu,
+                                        memory_info.pageSize,
+                                        0,
+                                        &ext_mapping_info,
+                                        &memory_info,
+                                        false) == NV_OK, done);
+
+    TEST_CHECK_GOTO(get_rm_ptes(0, size - memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
+    TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                        memory_mapping_gpu,
+                                        0,
+                                        size - memory_info.pageSize,
+                                        &ext_mapping_info,
+                                        &memory_info,
+                                        false) == NV_OK, done);
+
+    ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadWrite;
+    ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceCached;
+    TEST_CHECK_GOTO(get_rm_ptes(memory_info.pageSize, size - memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
+    TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                        memory_mapping_gpu,
+                                        memory_info.pageSize,
+                                        size - memory_info.pageSize,
+                                        &ext_mapping_info,
+                                        &memory_info,
+                                        false) == NV_OK, done);
+
+    ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadOnly;
+    ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceUncached;
+    TEST_CHECK_GOTO(get_rm_ptes(size - memory_info.pageSize, memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
+    TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                        memory_mapping_gpu,
+                                        size - memory_info.pageSize,
+                                        memory_info.pageSize,
+                                        &ext_mapping_info,
+                                        &memory_info,
+                                        false) == NV_OK, done);
+
+done:
+    free_status = uvm_rm_locked_call(nvUvmInterfaceFreeDupedHandle(rm_device, duped_memory));
+    if (status == NV_OK)
+        status = free_status;
+
+    return status;
+}
+
+static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
+{
+    NV_STATUS status = NV_OK;
+    NV_STATUS free_status;
+    uvm_gpu_t *memory_mapping_gpu;
+    NvHandle duped_memory;
+    UvmGpuExternalMappingInfo ext_mapping_info;
+    UvmGpuMemoryInfo memory_info;
+    uvmGpuDeviceHandle rm_device;
+    NvU64 pte_buffer[16] = {0};
+    uvmGpuAddressSpaceHandle gpu_va_space;
+
+   memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
+   if (!memory_mapping_gpu)
+       return NV_ERR_INVALID_DEVICE;
+
+   gpu_va_space = memory_mapping_gpu->rm_address_space;
+   rm_device = uvm_gpu_device_handle(memory_mapping_gpu);
+
+   status = uvm_rm_locked_call(nvUvmInterfaceDupMemory(rm_device,
+                                                       params->hClient,
+                                                       params->hMemory,
+                                                       &duped_memory,
+                                                       &memory_info));
+   if (status != NV_OK)
+       return status;
+
+    memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
+
+    memset(pte_buffer, 0, sizeof(pte_buffer));
+
+    ext_mapping_info.pteBuffer = pte_buffer;
+
+    ext_mapping_info.pteBufferSize = sizeof(pte_buffer);
+
+    switch (params->test_mode) {
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_SUPPORTED:
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED:
+            TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_OK, done);
+            TEST_CHECK_GOTO(verify_mapping_info(va_space,
+                                                memory_mapping_gpu,
+                                                0,
+                                                0,
+                                                &ext_mapping_info,
+                                                &memory_info,
+                                                (params->test_mode ==
+                                                        UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED)) == NV_OK, done);
+            break;
+
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_NOT_SUPPORTED:
+            TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_ERR_NOT_SUPPORTED, done);
+            break;
+
+        default:
+            status = NV_ERR_INVALID_PARAMETER;
+    }
+
+done:
+    free_status = uvm_rm_locked_call(nvUvmInterfaceFreeDupedHandle(rm_device, duped_memory));
+    if (status == NV_OK)
+        status = free_status;
+
+    return status;
+}
+
+NV_STATUS uvm_test_get_rm_ptes(UVM_TEST_GET_RM_PTES_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_va_space_down_read_rm(va_space);
+
+    switch (params->test_mode) {
+        case UVM_TEST_GET_RM_PTES_SINGLE_GPU:
+            status = test_get_rm_ptes_single_gpu(va_space, params);
+            break;
+
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_SUPPORTED:
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED:
+        case UVM_TEST_GET_RM_PTES_MULTI_GPU_NOT_SUPPORTED:
+            status = test_get_rm_ptes_multi_gpu(va_space, params);
+            break;
+
+        default:
+            status = NV_ERR_INVALID_PARAMETER;
+    }
+    uvm_va_space_up_read_rm(va_space);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -0,0 +1,473 @@
+/*******************************************************************************
+    Copyright (c) 2015-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_api.h"
+#include "uvm_ats.h"
+#include "uvm_global.h"
+#include "uvm_gpu_replayable_faults.h"
+#include "uvm_mem.h"
+#include "uvm_perf_events.h"
+#include "uvm_procfs.h"
+#include "uvm_thread_context.h"
+#include "uvm_va_range.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_mmu.h"
+#include "uvm_perf_heuristics.h"
+#include "uvm_pmm_sysmem.h"
+#include "uvm_migrate.h"
+#include "uvm_gpu_access_counters.h"
+#include "uvm_va_space_mm.h"
+#include "nv_uvm_interface.h"
+
+uvm_global_t g_uvm_global;
+static struct UvmOpsUvmEvents g_exported_uvm_ops;
+static bool g_ops_registered = false;
+
+static NV_STATUS uvm_register_callbacks(void)
+{
+    NV_STATUS status = NV_OK;
+
+    g_exported_uvm_ops.suspend = uvm_suspend_entry;
+    g_exported_uvm_ops.resume = uvm_resume_entry;
+    g_exported_uvm_ops.startDevice = NULL;
+    g_exported_uvm_ops.stopDevice  = NULL;
+    g_exported_uvm_ops.isrTopHalf  = uvm_isr_top_half_entry;
+
+    // Register the UVM callbacks with the main GPU driver:
+    status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
+    if (status != NV_OK)
+        return status;
+
+    g_ops_registered = true;
+    return NV_OK;
+}
+
+// Calling this function more than once is harmless:
+static void uvm_unregister_callbacks(void)
+{
+    if (g_ops_registered) {
+        uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
+        g_ops_registered = false;
+    }
+}
+
+
+
+
+
+
+
+
+NV_STATUS uvm_global_init(void)
+{
+    NV_STATUS status;
+    UvmPlatformInfo platform_info;
+
+    // Initialization of thread contexts happened already, during registration
+    // (addition) of the thread context associated with the UVM module entry
+    // point that is calling this function.
+    UVM_ASSERT(uvm_thread_context_global_initialized());
+
+    uvm_mutex_init(&g_uvm_global.global_lock, UVM_LOCK_ORDER_GLOBAL);
+    uvm_init_rwsem(&g_uvm_global.pm.lock, UVM_LOCK_ORDER_GLOBAL_PM);
+    uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
+    uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
+    INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
+
+    status = uvm_kvmalloc_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_kvmalloc_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.global_q, "UVM global queue"));
+    if (status != NV_OK) {
+        UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
+    if (status != NV_OK) {
+        UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_procfs_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_rm_locked_call(nvUvmInterfaceSessionCreate(&g_uvm_global.rm_session_handle, &platform_info));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("nvUvmInterfaceSessionCreate() failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+    uvm_ats_init(&platform_info);
+    g_uvm_global.num_simulated_devices = 0;
+
+
+
+
+
+    status = uvm_gpu_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_gpu_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_pmm_sysmem_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_pmm_sysmem_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_mmu_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_mmu_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_mem_global_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_mem_gloal_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_va_policy_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_va_policy_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_va_range_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_va_range_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_range_group_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_range_group_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_migrate_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_migrate_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_perf_events_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_perf_events_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_perf_heuristics_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_perf_heuristics_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    status = uvm_service_block_context_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_service_block_context_init failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    // This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
+    // call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
+    // this point:
+    status = uvm_register_callbacks();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
+    return NV_OK;
+
+error:
+    uvm_global_exit();
+    return status;
+}
+
+void uvm_global_exit(void)
+{
+    uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
+
+    // Guarantee completion of any release callbacks scheduled after the flush
+    // in uvm_resume().
+    nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
+
+    uvm_unregister_callbacks();
+    uvm_service_block_context_exit();
+
+    uvm_perf_heuristics_exit();
+    uvm_perf_events_exit();
+    uvm_migrate_exit();
+    uvm_range_group_exit();
+    uvm_va_range_exit();
+    uvm_va_policy_exit();
+    uvm_mem_global_exit();
+    uvm_pmm_sysmem_exit();
+    uvm_gpu_exit();
+
+    if (g_uvm_global.rm_session_handle != 0)
+        uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
+
+    uvm_procfs_exit();
+
+    nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
+    nv_kthread_q_stop(&g_uvm_global.global_q);
+
+    uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
+    UVM_ASSERT(list_empty(&g_uvm_global.va_spaces.list));
+
+    uvm_thread_context_global_exit();
+    uvm_kvmalloc_exit();
+}
+
+// Signal to the top-half ISR whether calls from the RM's top-half ISR are to
+// be completed without processing.
+static void uvm_gpu_set_isr_suspended(uvm_gpu_t *gpu, bool is_suspended)
+{
+    uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
+
+    gpu->parent->isr.is_suspended = is_suspended;
+
+    uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
+}
+
+static NV_STATUS uvm_suspend(void)
+{
+    uvm_va_space_t *va_space = NULL;
+    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_t *gpu;
+
+    // Upon entry into this function, the following is true:
+    //   * GPU interrupts are enabled
+    //   * Any number of fault or access counter notifications could
+    //     be pending
+    //   * No new fault notifications will appear, but new access
+    //     counter notifications could
+    //   * Any of the bottom halves could be running
+    //   * New bottom halves of all types could be scheduled as GPU
+    //     interrupts are handled
+    // Due to this, the sequence of suspend operations for each GPU is the
+    // following:
+    //   * Flush the fault buffer to prevent fault interrupts when
+    //     the top-half ISR is suspended
+    //   * Suspend access counter processing
+    //   * Suspend the top-half ISR
+    //   * Flush relevant kthread queues (bottom half, etc.)
+
+    // Some locks acquired by this function, such as pm.lock, are released
+    // by uvm_resume().  This is contrary to the lock tracking code's
+    // expectations, so lock tracking is disabled.
+    uvm_thread_context_lock_disable_tracking();
+
+    // Take the global power management lock in write mode to lock out
+    // most user-facing entry points.
+    uvm_down_write(&g_uvm_global.pm.lock);
+
+    nv_kthread_q_flush(&g_uvm_global.global_q);
+
+    // Though global_lock isn't held here, pm.lock indirectly prevents the
+    // addition and removal of GPUs, since these operations can currently
+    // only occur in response to ioctl() calls.
+    for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
+        gpu = uvm_gpu_get(gpu_id);
+
+        // Since fault buffer state may be lost across sleep cycles, UVM must
+        // ensure any outstanding replayable faults are dismissed.  The RM
+        // guarantees that all user channels have been preempted before
+        // uvm_suspend() is called, which implies that no user channels can be
+        // stalled on faults when this point is reached.
+        if (gpu->parent->replayable_faults_supported)
+            uvm_gpu_fault_buffer_flush(gpu);
+
+        // TODO: Bug 2535118: flush the non-replayable fault buffer
+
+        // Stop access counter interrupt processing for the duration of this
+        // sleep cycle to defend against potential interrupt storms in
+        // the suspend path: if rate limiting is applied to access counter
+        // interrupts in the bottom half in the future, the bottom half flush
+        // below will no longer be able to guarantee that all outstanding
+        // notifications have been handled.
+        uvm_gpu_access_counters_set_ignore(gpu, true);
+
+        uvm_gpu_set_isr_suspended(gpu, true);
+
+        nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);
+
+        if (gpu->parent->isr.non_replayable_faults.handling)
+            nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
+    }
+
+    // Acquire each VA space's lock in write mode to lock out VMA open and
+    // release callbacks.  These entry points do not have feasible early exit
+    // options, and so aren't suitable for synchronization with pm.lock.
+    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
+
+    list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
+        uvm_va_space_down_write(va_space);
+
+    uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
+
+    uvm_thread_context_lock_enable_tracking();
+
+    g_uvm_global.pm.is_suspended = true;
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_suspend_entry(void)
+{
+    UVM_ENTRY_RET(uvm_suspend());
+}
+
+static NV_STATUS uvm_resume(void)
+{
+    uvm_va_space_t *va_space = NULL;
+    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_t *gpu;
+
+    g_uvm_global.pm.is_suspended = false;
+
+    // Some locks released by this function, such as pm.lock, were acquired
+    // by uvm_suspend().  This is contrary to the lock tracking code's
+    // expectations, so lock tracking is disabled.
+    uvm_thread_context_lock_disable_tracking();
+
+    // Release each VA space's lock.
+    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
+
+    list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
+        uvm_va_space_up_write(va_space);
+
+    uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
+
+    // pm.lock is held in lieu of global_lock to prevent GPU addition/removal
+    for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
+        gpu = uvm_gpu_get(gpu_id);
+
+        // Bring the fault buffer software state back in sync with the
+        // hardware state.
+        uvm_gpu_fault_buffer_resume(gpu->parent);
+
+        uvm_gpu_set_isr_suspended(gpu, false);
+
+        // Reenable access counter interrupt processing unless notifications
+        // have been set to be suppressed.
+        uvm_gpu_access_counters_set_ignore(gpu, false);
+    }
+
+    uvm_up_write(&g_uvm_global.pm.lock);
+
+    uvm_thread_context_lock_enable_tracking();
+
+    // Force completion of any release callbacks successfully queued for
+    // deferred completion while suspended.  The deferred release
+    // queue is not guaranteed to remain empty following this flush since
+    // some threads that failed to acquire pm.lock in uvm_release() may
+    // not have scheduled their handlers yet.
+    nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_resume_entry(void)
+{
+    UVM_ENTRY_RET(uvm_resume());
+}
+
+bool uvm_global_is_suspended(void)
+{
+    return g_uvm_global.pm.is_suspended;
+}
+
+void uvm_global_set_fatal_error_impl(NV_STATUS error)
+{
+    NV_STATUS previous_error;
+
+    UVM_ASSERT(error != NV_OK);
+
+    previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
+
+    if (previous_error == NV_OK) {
+        UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
+    }
+    else {
+        UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
+                nvstatusToString(error), nvstatusToString(previous_error));
+    }
+}
+
+NV_STATUS uvm_global_reset_fatal_error(void)
+{
+    if (!uvm_enable_builtin_tests) {
+        UVM_ASSERT_MSG(0, "Resetting global fatal error without tests being enabled\n");
+        return NV_ERR_INVALID_STATE;
+    }
+
+    return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
+}
+
+void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask)
+{
+    uvm_gpu_t *gpu;
+    for_each_global_gpu_in_mask(gpu, mask)
+        uvm_gpu_retain(gpu);
+}
+
+void uvm_global_mask_release(const uvm_global_processor_mask_t *mask)
+{
+    uvm_global_gpu_id_t gpu_id;
+
+    if (uvm_global_processor_mask_empty(mask))
+        return;
+
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+
+    // Do not use for_each_global_gpu_in_mask as it reads the GPU state and it
+    // might get destroyed
+    for_each_global_gpu_id_in_mask(gpu_id, mask)
+        uvm_gpu_release_locked(uvm_gpu_get(gpu_id));
+
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+}
+
+NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus)
+{
+    uvm_gpu_t *gpu;
+
+    for_each_global_gpu_in_mask(gpu, gpus) {
+        NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
+        if (status != NV_OK)
+            return status;
+    }
+
+    return NV_OK;
+}
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -0,0 +1,416 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_GLOBAL_H__
+#define __UVM_GLOBAL_H__
+
+#include "nv_uvm_types.h"
+#include "uvm_extern_decl.h"
+#include "uvm_linux.h"
+#include "uvm_common.h"
+#include "uvm_processors.h"
+#include "uvm_gpu.h"
+#include "uvm_lock.h"
+#include "uvm_ats_ibm.h"
+
+// Global state of the uvm driver
+struct uvm_global_struct
+{
+    // Mask of retained GPUs.
+    // Note that GPUs are added to this mask as the last step of add_gpu() and
+    // removed from it as the first step of remove_gpu() implying that a GPU
+    // that's being initialized or deinitialized will not be in it.
+    uvm_global_processor_mask_t retained_gpus;
+
+    // Array of the parent GPUs registered with UVM. Note that GPUs will have
+    // ids offset by 1 to accomodate the UVM_GLOBAL_ID_CPU so e.g.
+    // parent_gpus[0] will have GPU id = 1. A GPU entry is unused iff it does
+    // not exist (is a NULL pointer) in this table.
+    uvm_parent_gpu_t *parent_gpus[UVM_MAX_GPUS];
+
+    // A global RM session (RM client)
+    // Created on module load and destroyed on module unload
+    uvmGpuSessionHandle rm_session_handle;
+
+    // peer-to-peer table
+    // peer info is added and removed from this table when usermode
+    // driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
+    // respectively.
+    uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
+
+    // peer-to-peer copy mode
+    // Pascal+ GPUs support virtual addresses in p2p copies.
+    // Ampere+ GPUs add support for physical addresses in p2p copies.
+    uvm_gpu_peer_copy_mode_t peer_copy_mode;
+
+    // Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
+    // do most anything other than try and clean up as much as possible.
+    // An example of a fatal error is an unrecoverable ECC error on one of the
+    // GPUs.
+    atomic_t fatal_error;
+
+    // A flag to disable the assert on fatal error
+    // To be used by tests and only consulted if tests are enabled.
+    bool disable_fatal_error_assert;
+
+    // Lock protecting the global state
+    uvm_mutex_t global_lock;
+
+    struct
+    {
+        // Lock synchronizing user threads with power management activity
+        uvm_rw_semaphore_t lock;
+
+        // Power management state flag; tested by UVM_GPU_WRITE_ONCE()
+        // and UVM_GPU_READ_ONCE() to detect accesses to GPUs when
+        // UVM is suspended.
+        bool is_suspended;
+    } pm;
+
+    // This lock synchronizes addition and removal of GPUs from UVM's global
+    // table. It must be held whenever g_uvm_global.parent_gpus[] is written. In
+    // order to read from this table, you must hold either the gpu_table_lock,
+    // or the global_lock.
+    //
+    // This is a leaf lock.
+    uvm_spinlock_irqsave_t gpu_table_lock;
+
+    // Number of simulated/emulated devices that have registered with UVM
+    unsigned num_simulated_devices;
+
+    // A single queue for deferred work that is non-GPU-specific.
+    nv_kthread_q_t global_q;
+
+    // A single queue for deferred f_ops->release() handling.  Items scheduled to
+    // run on it may block for the duration of system sleep cycles, stalling
+    // the queue and preventing any other items from running.
+    nv_kthread_q_t deferred_release_q;
+
+    struct
+    {
+        // Indicates whether the system HW supports ATS. This field is set once
+        // during global initialization (uvm_global_init), and can be read
+        // afterwards without acquiring any locks.
+        bool supported;
+
+        // On top of HW platform support, ATS support can be overridden using
+        // the module parameter uvm_ats_mode. This field is set once during
+        // global initialization (uvm_global_init), and can be read afterwards
+        // without acquiring any locks.
+        bool enabled;
+    } ats;
+
+#if UVM_IBM_NPU_SUPPORTED()
+    // On IBM systems this array tracks the active NPUs (the NPUs which are
+    // attached to retained GPUs).
+    uvm_ibm_npu_t npus[NV_MAX_NPUS];
+#endif
+
+    // List of all active VA spaces
+    struct
+    {
+        uvm_mutex_t lock;
+        struct list_head list;
+    } va_spaces;
+
+    // Notify a registered process about the driver state after it's unloaded.
+    // The intent is to systematically report any error during the driver
+    // teardown. unload_state is used for testing only.
+    struct
+    {
+        // ptr points to a 8-byte buffer within page.
+        NvU64 *ptr;
+        struct page *page;
+    } unload_state;
+
+
+
+
+
+
+
+};
+
+// Initialize global uvm state
+NV_STATUS uvm_global_init(void);
+
+// Deinitialize global state (called from module exit)
+void uvm_global_exit(void);
+
+// Prepare for entry into a system sleep state
+NV_STATUS uvm_suspend_entry(void);
+
+// Recover after exit from a system sleep state
+NV_STATUS uvm_resume_entry(void);
+
+// Add parent GPU to the global table.
+//
+// LOCKING: requires that you hold the global lock and gpu_table_lock
+static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
+
+    UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]);
+    g_uvm_global.parent_gpus[gpu_index] = parent_gpu;
+}
+
+// Remove parent GPU from the global table.
+//
+// LOCKING: requires that you hold the global lock and gpu_table_lock
+static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
+
+    UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
+    UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
+
+    g_uvm_global.parent_gpus[gpu_index] = NULL;
+}
+
+// Get a gpu by its global id.
+// Returns a pointer to the GPU object, or NULL if not found.
+//
+// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
+// retained the gpu.
+static uvm_gpu_t *uvm_gpu_get(uvm_global_gpu_id_t global_gpu_id)
+{
+    uvm_parent_gpu_t *parent_gpu;
+
+    parent_gpu = g_uvm_global.parent_gpus[uvm_id_gpu_index_from_global_gpu_id(global_gpu_id)];
+    if (!parent_gpu)
+        return NULL;
+
+    return parent_gpu->gpus[uvm_global_id_sub_processor_index(global_gpu_id)];
+}
+
+// Get a gpu by its processor id.
+// Returns a pointer to the GPU object, or NULL if not found.
+//
+// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
+// retained the gpu.
+static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
+{
+    uvm_global_gpu_id_t global_id = uvm_global_gpu_id_from_gpu_id(id);
+    uvm_gpu_t *gpu = uvm_gpu_get(global_id);
+
+    if (gpu)
+        UVM_ASSERT(!gpu->parent->smc.enabled);
+
+    return gpu;
+}
+
+static uvmGpuSessionHandle uvm_gpu_session_handle(uvm_gpu_t *gpu)
+{
+    if (gpu->parent->smc.enabled)
+        return gpu->smc.rm_session_handle;
+    return g_uvm_global.rm_session_handle;
+}
+
+// Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources
+// in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is
+// suspended.
+#define UVM_GPU_WRITE_ONCE(x, val) do {         \
+        UVM_ASSERT(!uvm_global_is_suspended()); \
+        UVM_WRITE_ONCE(x, val);                 \
+    } while (0)
+
+#define UVM_GPU_READ_ONCE(x) ({                 \
+        UVM_ASSERT(!uvm_global_is_suspended()); \
+        UVM_READ_ONCE(x);                       \
+    })
+
+static bool global_is_fatal_error_assert_disabled(void)
+{
+    // Only allow the assert to be disabled if tests are enabled
+    if (!uvm_enable_builtin_tests)
+        return false;
+
+    return g_uvm_global.disable_fatal_error_assert;
+}
+
+// Set a global fatal error
+// Once that happens the the driver should refuse to do anything other than try
+// and clean up as much as possible.
+// An example of a fatal error is an unrecoverable ECC error on one of the
+// GPUs.
+// Use a macro so that the assert below provides precise file and line info and
+// a backtrace.
+#define uvm_global_set_fatal_error(error)                                       \
+    do {                                                                        \
+        if (!global_is_fatal_error_assert_disabled())                           \
+            UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error));    \
+        uvm_global_set_fatal_error_impl(error);                                 \
+    } while (0)
+void uvm_global_set_fatal_error_impl(NV_STATUS error);
+
+// Get the global status
+static NV_STATUS uvm_global_get_status(void)
+{
+    return atomic_read(&g_uvm_global.fatal_error);
+}
+
+// Reset global fatal error
+// This is to be used by tests triggering the global error on purpose only.
+// Returns the value of the global error field that existed just before this
+// reset call was made.
+NV_STATUS uvm_global_reset_fatal_error(void);
+
+static uvm_gpu_t *uvm_global_processor_mask_find_first_gpu(const uvm_global_processor_mask_t *global_gpus)
+{
+    uvm_gpu_t *gpu;
+    uvm_global_gpu_id_t gpu_id = uvm_global_processor_mask_find_first_gpu_id(global_gpus);
+
+    if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
+        return NULL;
+
+    gpu = uvm_gpu_get(gpu_id);
+
+    // If there is valid GPU id in the mask, assert that the corresponding
+    // uvm_gpu_t is present. Otherwise it would stop a
+    // for_each_global_gpu_in_mask() loop pre-maturely. Today, this could only
+    // happen in remove_gpu() because the GPU being removed is deleted from the
+    // global table very early.
+    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
+
+    return gpu;
+}
+
+static uvm_gpu_t *__uvm_global_processor_mask_find_next_gpu(const uvm_global_processor_mask_t *global_gpus, uvm_gpu_t *gpu)
+{
+    uvm_global_gpu_id_t gpu_id;
+
+    UVM_ASSERT(gpu);
+
+    gpu_id = uvm_global_processor_mask_find_next_id(global_gpus, uvm_global_gpu_id_next(gpu->global_id));
+    if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
+        return NULL;
+
+    gpu = uvm_gpu_get(gpu_id);
+
+    // See comment in uvm_global_processor_mask_find_first_gpu().
+    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
+
+    return gpu;
+}
+
+// Helper to iterate over all GPUs in the input mask
+#define for_each_global_gpu_in_mask(gpu, global_mask)                                         \
+    for (gpu = uvm_global_processor_mask_find_first_gpu(global_mask);                         \
+         gpu != NULL;                                                                         \
+         gpu = __uvm_global_processor_mask_find_next_gpu(global_mask, gpu))
+
+// Helper to iterate over all GPUs retained by the UVM driver (across all va spaces)
+#define for_each_global_gpu(gpu)                                                              \
+    for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                                \
+           gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);});    \
+           gpu != NULL;                                                                       \
+           gpu = __uvm_global_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
+
+// LOCKING: Must hold either the global_lock or the gpu_table_lock
+static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    NvU32 i;
+
+    if (parent_gpu) {
+        NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+        i = gpu_index + 1;
+    }
+    else {
+        i = 0;
+    }
+
+    parent_gpu = NULL;
+
+    while (i < UVM_MAX_GPUS) {
+        if (g_uvm_global.parent_gpus[i]) {
+            parent_gpu = g_uvm_global.parent_gpus[i];
+            break;
+        }
+
+        i++;
+    }
+
+    return parent_gpu;
+}
+
+// LOCKING: Must hold the global_lock
+static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
+{
+    uvm_gpu_t *gpu = NULL;
+    uvm_global_gpu_id_t global_gpu_id;
+    NvU32 sub_processor_index;
+    NvU32 cur_sub_processor_index;
+
+    UVM_ASSERT(parent_gpu);
+
+    global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
+    cur_sub_processor_index = cur_gpu ? uvm_global_id_sub_processor_index(cur_gpu->global_id) : -1;
+
+    sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
+    if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
+        gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
+        UVM_ASSERT(gpu != NULL);
+    }
+
+    return gpu;
+}
+
+// LOCKING: Must hold either the global_lock or the gpu_table_lock
+#define for_each_parent_gpu(parent_gpu)                                                         \
+    for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL);                                  \
+         (parent_gpu) != NULL;                                                                  \
+         (parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
+
+// LOCKING: Must hold the global_lock
+#define for_each_gpu_in_parent(parent_gpu, gpu)                                                 \
+    for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                                  \
+         (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);});                  \
+         (gpu) != NULL;                                                                         \
+         (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
+
+// Helper which calls uvm_gpu_retain on each GPU in mask
+void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask);
+
+// Helper which calls uvm_gpu_release_locked on each GPU in mask.
+//
+// LOCKING: this function takes and releases the global lock if the input mask
+//          is not empty
+void uvm_global_mask_release(const uvm_global_processor_mask_t *mask);
+
+// Check for ECC errors for all GPUs in a mask
+// Notably this check cannot be performed where it's not safe to call into RM.
+NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus);
+
+// Pre-allocate fault service contexts.
+NV_STATUS uvm_service_block_context_init(void);
+
+// Release fault service contexts if any exist.
+void uvm_service_block_context_exit(void);
+
+#endif // __UVM_GLOBAL_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -0,0 +1,88 @@
+/*******************************************************************************
+    Copyright (c) 2017 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#ifndef __UVM_GPU_ACCESS_COUNTERS_H__
+#define __UVM_GPU_ACCESS_COUNTERS_H__
+
+#include "uvm_common.h"
+#include "uvm_forward_decl.h"
+#include "uvm_test_ioctl.h"
+
+NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
+bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
+
+void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
+
+// Ignore or unignore access counters notifications. Ignoring means that the
+// bottom half is a no-op which just leaves notifications in the HW buffer
+// without being serviced and without inspecting any SW state.
+//
+// To avoid interrupt storms, access counter interrupts will be disabled while
+// ignored. Access counter bottom halves may still be scheduled in the top half
+// when other interrupts arrive and the top half sees that there are also
+// pending access counter notifications.
+//
+// When uningoring, the interrupt conditions will be re-evaluated to trigger
+// processing of buffered notifications, if any exist.
+void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
+
+// Return whether the VA space has access counter migrations enabled. The
+// caller must ensure that the VA space cannot go away.
+bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
+
+// Global perf initialization/cleanup functions
+NV_STATUS uvm_perf_access_counters_init(void);
+void uvm_perf_access_counters_exit(void);
+
+// VA space Initialization/cleanup functions. See comments in
+// uvm_perf_heuristics.h
+NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
+void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
+
+// Check whether access counters should be enabled when the given GPU is
+// registered on any VA space.
+bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
+
+// Functions used to enable/disable access counters on a GPU in the given VA
+// space.
+//
+// A per-GPU reference counter tracks the number of VA spaces in which access
+// counters are currently enabled. The hardware notifications and interrupts on
+// the GPU are enabled the first time any VA space invokes
+// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
+// uvm_gpu_access_counters_disable
+//
+// Locking: the VA space lock must not be held by the caller since these
+// functions may take the access counters ISR lock.
+NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
+void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
+
+NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
+                                                      struct file *filp);
+NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+
+#endif // __UVM_GPU_ACCESS_COUNTERS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -0,0 +1,774 @@
+/*******************************************************************************
+    Copyright (c) 2016-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_api.h"
+#include "uvm_global.h"
+#include "uvm_gpu_isr.h"
+#include "uvm_hal.h"
+#include "uvm_gpu.h"
+#include "uvm_gpu_access_counters.h"
+#include "uvm_gpu_non_replayable_faults.h"
+#include "uvm_thread_context.h"
+
+// Level-based vs pulse-based interrupts
+// =====================================
+// Turing switches to pulse-based interrupts for replayable/non-replayable
+// faults and access counter notifications. Prior GPUs use level-based
+// interrupts.
+//
+// Level-based interrupts are rearmed automatically as long as the interrupt
+// condition is set. Pulse-based interrupts, on the other hand, are
+// re-triggered by clearing their interrupt line and forcing the interrupt
+// condition to be re-evaluated. However, RM re-triggers all top-level
+// interrupts when exiting its top half. Thus, both level-based and pulse-based
+// interrupts need to be disabled at interrupt handling boundaries, in order to
+// avoid interrupt storms.
+//
+// Moreover, in order to make sure that pulse-based interrupts are not missed,
+// we need to clear the interrupt bit and force a interrupt condition
+// re-evaluation after interrupts are re-enabled. In the case of replayable
+// faults and access counter notifications the interrupt condition is
+// re-evaluated by writing to GET. Non-replayable faults work the same way, but
+// they are currently owned by RM, so UVM doesn't have to do anything.
+
+// For use by the nv_kthread_q that is servicing the replayable fault bottom
+// half, only.
+static void replayable_faults_isr_bottom_half_entry(void *args);
+
+// For use by the nv_kthread_q that is servicing the replayable fault bottom
+// half, only.
+static void non_replayable_faults_isr_bottom_half_entry(void *args);
+
+// For use by the nv_kthread_q that is servicing the replayable fault bottom
+// half, only.
+static void access_counters_isr_bottom_half_entry(void *args);
+
+// Increments the reference count tracking whether replayable page fault
+// interrupts should be disabled. The caller is guaranteed that replayable page
+// faults are disabled upon return. Interrupts might already be disabled prior
+// to making this call. Each call is ref-counted, so this must be paired with a
+// call to uvm_gpu_replayable_faults_intr_enable().
+//
+// parent_gpu->isr.interrupts_lock must be held to call this function.
+static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);
+
+// Decrements the reference count tracking whether replayable page fault
+// interrupts should be disabled. Only once the count reaches 0 are the HW
+// interrupts actually enabled, so this call does not guarantee that the
+// interrupts have been re-enabled upon return.
+//
+// uvm_gpu_replayable_faults_intr_disable() must have been called prior to
+// calling this function.
+//
+// parent_gpu->isr.interrupts_lock must be held to call this function.
+static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
+
+static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
+{
+    // handling gets set to false for all handlers during removal, so quit if
+    // the GPU is in the process of being removed.
+    if (parent_gpu->isr.replayable_faults.handling) {
+
+        // Use raw call instead of UVM helper. Ownership will be recorded in the
+        // bottom half. See comment replayable_faults_isr_bottom_half().
+        if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) == 0) {
+            if (uvm_gpu_replayable_faults_pending(parent_gpu)) {
+                nv_kref_get(&parent_gpu->gpu_kref);
+
+                // Interrupts need to be disabled here to avoid an interrupt
+                // storm
+                uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+
+                // Schedule a bottom half, but do *not* release the GPU ISR
+                // lock. The bottom half releases the GPU ISR lock as part of
+                // its cleanup.
+                nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
+                                             &parent_gpu->isr.replayable_faults.bottom_half_q_item);
+                return 1;
+            }
+            else {
+                up(&parent_gpu->isr.replayable_faults.service_lock.sem);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
+{
+    // handling gets set to false for all handlers during removal, so quit if
+    // the GPU is in the process of being removed.
+    if (parent_gpu->isr.non_replayable_faults.handling) {
+        // Non-replayable_faults are stored in a synchronized circular queue
+        // shared by RM/UVM. Therefore, we can query the number of pending
+        // faults. This type of faults are not replayed and since RM advances
+        // GET to PUT when copying the fault packets to the queue, no further
+        // interrupts will be triggered by the gpu and faults may stay
+        // unserviced. Therefore, if there is a fault in the queue, we schedule
+        // a bottom half unconditionally.
+        if (uvm_gpu_non_replayable_faults_pending(parent_gpu)) {
+            bool scheduled;
+            nv_kref_get(&parent_gpu->gpu_kref);
+
+            scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
+                                                     &parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;
+
+            // If the q_item did not get scheduled because it was already
+            // queued, that instance will handle the pending faults. Just
+            // drop the GPU kref.
+            if (!scheduled)
+                uvm_parent_gpu_kref_put(parent_gpu);
+
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+
+    if (!parent_gpu->isr.access_counters.handling_ref_count)
+        return 0;
+
+    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem))
+        return 0;
+
+    if (!uvm_gpu_access_counters_pending(parent_gpu)) {
+        up(&parent_gpu->isr.access_counters.service_lock.sem);
+        return 0;
+    }
+
+    nv_kref_get(&parent_gpu->gpu_kref);
+
+    // Interrupts need to be disabled to avoid an interrupt storm
+    uvm_gpu_access_counters_intr_disable(parent_gpu);
+
+    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
+                                 &parent_gpu->isr.access_counters.bottom_half_q_item);
+
+    return 1;
+}
+
+// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
+// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
+// did, back to RM, via the return code:
+//
+//     NV_OK:
+//         UVM handled an interrupt.
+//
+//     NV_WARN_MORE_PROCESSING_REQUIRED:
+//         UVM did not schedule a bottom half, because it was unable to get the locks it
+//         needed, but there is still UVM work to be done. RM will return "not handled" to the
+//         Linux kernel, *unless* RM handled other faults in its top half. In that case, the
+//         fact that UVM did not handle its interrupt is lost. However, life and interrupt
+//         processing continues anyway: the GPU will soon raise another interrupt, because
+//         that's what it does when there are replayable page faults remaining (GET != PUT in
+//         the fault buffer).
+//
+//     NV_ERR_NO_INTR_PENDING:
+//         UVM did not find any work to do. Currently this is handled in RM in exactly the same
+//         way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
+//         available for the future. RM's interrupt handling tends to evolve as new chips and
+//         new interrupts get created.
+
+static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
+{
+    uvm_parent_gpu_t *parent_gpu;
+    unsigned num_handlers_scheduled = 0;
+    NV_STATUS status;
+
+    if (!in_interrupt() && in_atomic()) {
+        // Early-out if we're not in interrupt context, but memory allocations
+        // require GFP_ATOMIC. This happens with CONFIG_DEBUG_SHIRQ enabled,
+        // where the interrupt handler is called as part of its removal to make
+        // sure it's prepared for being called even when it's being freed.
+        // This breaks the assumption that the UVM driver is called in atomic
+        // context only in the interrupt context, which the thread context
+        // management relies on.
+        return NV_OK;
+    }
+
+    if (!gpu_uuid) {
+        // This can happen early in the main GPU driver initialization, because
+        // that involves testing interrupts before the GPU is fully set up.
+        return NV_ERR_NO_INTR_PENDING;
+    }
+
+    uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
+
+    parent_gpu = uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
+
+    if (parent_gpu == NULL) {
+        uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+        return NV_ERR_NO_INTR_PENDING;
+    }
+
+    // We take a reference during the top half, and an additional reference for
+    // each scheduled bottom. References are dropped at the end of the bottom
+    // halves.
+    nv_kref_get(&parent_gpu->gpu_kref);
+    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+
+    // Now that we got a GPU object, lock it so that it can't be removed without us noticing.
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    ++parent_gpu->isr.interrupt_count;
+
+    if (parent_gpu->isr.is_suspended) {
+        status = NV_ERR_NO_INTR_PENDING;
+    }
+    else {
+        num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
+        num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
+        num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
+
+        if (num_handlers_scheduled == 0)
+            status = NV_WARN_MORE_PROCESSING_REQUIRED;
+        else
+            status = NV_OK;
+    }
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+
+    uvm_parent_gpu_kref_put(parent_gpu);
+
+    return status;
+}
+
+NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid)
+{
+    UVM_ENTRY_RET(uvm_isr_top_half(gpu_uuid));
+}
+
+static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int node)
+{
+#if UVM_THREAD_AFFINITY_SUPPORTED()
+    if (node != -1 && !cpumask_empty(uvm_cpumask_of_node(node))) {
+        NV_STATUS status;
+
+        status = errno_to_nv_status(nv_kthread_q_init_on_node(queue, name, node));
+        if (status != NV_OK)
+            return status;
+
+        return errno_to_nv_status(set_cpus_allowed_ptr(queue->q_kthread, uvm_cpumask_of_node(node)));
+    }
+#endif
+
+    return errno_to_nv_status(nv_kthread_q_init(queue, name));
+}
+
+NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
+{
+    NV_STATUS status = NV_OK;
+    char kthread_name[TASK_COMM_LEN + 1];
+
+    if (parent_gpu->replayable_faults_supported) {
+        status = uvm_gpu_fault_buffer_init(parent_gpu);
+        if (status != NV_OK) {
+            UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
+                          nvstatusToString(status),
+                          parent_gpu->name);
+            return status;
+        }
+
+        nv_kthread_q_item_init(&parent_gpu->isr.replayable_faults.bottom_half_q_item,
+                               replayable_faults_isr_bottom_half_entry,
+                               parent_gpu);
+
+        parent_gpu->isr.replayable_faults.stats.cpu_exec_count =
+            uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.replayable_faults.stats.cpu_exec_count) * num_possible_cpus());
+        if (!parent_gpu->isr.replayable_faults.stats.cpu_exec_count)
+            return NV_ERR_NO_MEMORY;
+
+        parent_gpu->isr.replayable_faults.handling = true;
+
+        snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_id_value(parent_gpu->id));
+        status = init_queue_on_node(&parent_gpu->isr.bottom_half_q, kthread_name, parent_gpu->closest_cpu_numa_node);
+        if (status != NV_OK) {
+            UVM_ERR_PRINT("Failed in nv_kthread_q_init for bottom_half_q: %s, GPU %s\n",
+                          nvstatusToString(status),
+                          parent_gpu->name);
+            return status;
+        }
+
+        if (parent_gpu->non_replayable_faults_supported) {
+            nv_kthread_q_item_init(&parent_gpu->isr.non_replayable_faults.bottom_half_q_item,
+                                   non_replayable_faults_isr_bottom_half_entry,
+                                   parent_gpu);
+
+            parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count =
+                uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count) *
+                                  num_possible_cpus());
+            if (!parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->isr.non_replayable_faults.handling = true;
+
+            snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_id_value(parent_gpu->id));
+            status = init_queue_on_node(&parent_gpu->isr.kill_channel_q,
+                                        kthread_name,
+                                        parent_gpu->closest_cpu_numa_node);
+            if (status != NV_OK) {
+                UVM_ERR_PRINT("Failed in nv_kthread_q_init for kill_channel_q: %s, GPU %s\n",
+                              nvstatusToString(status),
+                              parent_gpu->name);
+                return status;
+            }
+        }
+
+        if (parent_gpu->access_counters_supported) {
+            status = uvm_gpu_init_access_counters(parent_gpu);
+            if (status != NV_OK) {
+                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
+                              nvstatusToString(status),
+                              parent_gpu->name);
+                return status;
+            }
+
+            nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
+                                   access_counters_isr_bottom_half_entry,
+                                   parent_gpu);
+
+            // Access counters interrupts are initially disabled. They are
+            // dynamically enabled when the GPU is registered on a VA space.
+            parent_gpu->isr.access_counters.handling_ref_count = 0;
+            parent_gpu->isr.access_counters.stats.cpu_exec_count =
+                uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
+            if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
+                return NV_ERR_NO_MEMORY;
+        }
+    }
+
+    return NV_OK;
+}
+
+void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
+{
+    nv_kthread_q_flush(&parent_gpu->isr.bottom_half_q);
+    nv_kthread_q_flush(&parent_gpu->isr.kill_channel_q);
+}
+
+void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+
+    // Now that the GPU is safely out of the global table, lock the GPU and mark
+    // it as no longer handling interrupts so the top half knows not to schedule
+    // any more bottom halves.
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+
+    parent_gpu->isr.replayable_faults.was_handling = parent_gpu->isr.replayable_faults.handling;
+    parent_gpu->isr.non_replayable_faults.was_handling = parent_gpu->isr.non_replayable_faults.handling;
+
+    parent_gpu->isr.replayable_faults.handling = false;
+    parent_gpu->isr.non_replayable_faults.handling = false;
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+
+    // Flush all bottom half ISR work items and stop the nv_kthread_q that is
+    // servicing this GPU's bottom halves. Note that this requires that the
+    // bottom half never take the global lock, since we're holding it here.
+    //
+    // Note that it's safe to call nv_kthread_q_stop() even if
+    // nv_kthread_q_init() failed in uvm_gpu_init_isr().
+    nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
+    nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
+}
+
+void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
+{
+    // Return ownership to RM:
+    if (parent_gpu->isr.replayable_faults.was_handling) {
+        // No user threads could have anything left on
+        // replayable_faults.disable_intr_ref_count since they must retain the
+        // GPU across uvm_gpu_replayable_faults_isr_lock/
+        // uvm_gpu_replayable_faults_isr_unlock. This means the
+        // uvm_gpu_replayable_faults_disable_intr above could only have raced
+        // with bottom halves.
+        //
+        // If we cleared replayable_faults.handling before the bottom half got
+        // to its uvm_gpu_replayable_faults_isr_unlock, when it eventually
+        // reached uvm_gpu_replayable_faults_isr_unlock it would have skipped
+        // the disable, leaving us with extra ref counts here.
+        //
+        // In any case we're guaranteed that replayable faults interrupts are
+        // disabled and can't get re-enabled, so we can safely ignore the ref
+        // count value and just clean things up.
+        UVM_ASSERT_MSG(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0,
+                       "%s replayable_faults.disable_intr_ref_count: %llu\n",
+                       parent_gpu->name,
+                       parent_gpu->isr.replayable_faults.disable_intr_ref_count);
+
+        uvm_gpu_fault_buffer_deinit(parent_gpu);
+    }
+
+    if (parent_gpu->access_counters_supported) {
+        // It is safe to deinitialize access counters even if they have not been
+        // successfully initialized.
+        uvm_gpu_deinit_access_counters(parent_gpu);
+    }
+
+    uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
+    uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
+    uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
+}
+
+static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_global_gpu_id_t global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
+    uvm_gpu_t *gpu;
+
+    // When SMC is enabled, there's no longer a 1:1 relationship between the
+    // parent and the partitions.  But because all relevant interrupt paths
+    // are shared, as is the fault reporting logic, it's sufficient here
+    // to proceed with any valid uvm_gpu_t, even if the corresponding partition
+    // didn't cause all, or even any of the interrupts.
+    // The bottom half handlers will later find the appropriate partitions by
+    // attributing the notifications to VA spaces as necessary.
+    if (parent_gpu->smc.enabled) {
+        NvU32 sub_processor_index;
+
+        uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
+
+        sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
+
+        if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
+            gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
+            UVM_ASSERT(gpu != NULL);
+        }
+        else {
+            gpu = NULL;
+        }
+
+        uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+    }
+    else {
+        gpu = uvm_gpu_get(global_gpu_id);
+        UVM_ASSERT(gpu != NULL);
+    }
+
+    return gpu;
+}
+
+static void replayable_faults_isr_bottom_half(void *args)
+{
+    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
+    unsigned int cpu;
+
+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
+    UVM_ASSERT(parent_gpu->replayable_faults_supported);
+
+    // Record the lock ownership
+    // The service_lock semaphore is taken in the top half using a raw
+    // semaphore call (down_trylock()). Here, the lock "ownership" is recorded,
+    // using a direct call to uvm_record_lock(). The pair of the two raw calls
+    // result in an ownership "transfer" between the top and bottom halves.
+    // Due to this ownership transfer, other usages of the service_lock can
+    // use the UVM (un)lock helpers to handle lock ownership and record keeping.
+    uvm_record_lock(&parent_gpu->isr.replayable_faults.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
+
+    // Multiple bottom halves for replayable faults can be running
+    // concurrently, but only one can be running this function for a given GPU
+    // since we enter with the replayable_faults.service_lock held.
+    cpu = get_cpu();
+    ++parent_gpu->isr.replayable_faults.stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.replayable_faults.stats.cpus_used_mask);
+    ++parent_gpu->isr.replayable_faults.stats.cpu_exec_count[cpu];
+    put_cpu();
+
+    uvm_gpu_service_replayable_faults(gpu);
+
+    uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
+
+put_kref:
+    uvm_parent_gpu_kref_put(parent_gpu);
+}
+
+static void replayable_faults_isr_bottom_half_entry(void *args)
+{
+   UVM_ENTRY_VOID(replayable_faults_isr_bottom_half(args));
+}
+
+static void non_replayable_faults_isr_bottom_half(void *args)
+{
+    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
+    unsigned int cpu;
+
+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
+    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
+
+    uvm_gpu_non_replayable_faults_isr_lock(parent_gpu);
+
+    // Multiple bottom halves for non-replayable faults can be running
+    // concurrently, but only one can enter this section for a given GPU
+    // since we acquired the non_replayable_faults.service_lock
+    cpu = get_cpu();
+    ++parent_gpu->isr.non_replayable_faults.stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.non_replayable_faults.stats.cpus_used_mask);
+    ++parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count[cpu];
+    put_cpu();
+
+    uvm_gpu_service_non_replayable_fault_buffer(gpu);
+
+    uvm_gpu_non_replayable_faults_isr_unlock(parent_gpu);
+
+put_kref:
+    uvm_parent_gpu_kref_put(parent_gpu);
+}
+
+static void non_replayable_faults_isr_bottom_half_entry(void *args)
+{
+   UVM_ENTRY_VOID(non_replayable_faults_isr_bottom_half(args));
+}
+
+static void access_counters_isr_bottom_half(void *args)
+{
+    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_gpu_t *gpu;
+    unsigned int cpu;
+
+    gpu = find_first_valid_gpu(parent_gpu);
+    if (gpu == NULL)
+        goto put_kref;
+
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+
+    uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
+
+    // Multiple bottom halves for counter notifications can be running
+    // concurrently, but only one can be running this function for a given GPU
+    // since we enter with the access_counters_isr_lock held.
+    cpu = get_cpu();
+    ++parent_gpu->isr.access_counters.stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
+    ++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
+    put_cpu();
+
+    uvm_gpu_service_access_counters(gpu);
+
+    uvm_gpu_access_counters_isr_unlock(parent_gpu);
+
+put_kref:
+    uvm_parent_gpu_kref_put(parent_gpu);
+}
+
+static void access_counters_isr_bottom_half_entry(void *args)
+{
+   UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
+}
+
+void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
+
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    // Bump the disable ref count. This guarantees that the bottom half or
+    // another thread trying to take the replayable_faults.service_lock won't
+    // inadvertently re-enable interrupts during this locking sequence.
+    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+
+    // Now that we know replayable fault interrupts can't get enabled, take the
+    // lock.
+    uvm_down(&parent_gpu->isr.replayable_faults.service_lock);
+}
+
+void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
+
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    // The following sequence is delicate:
+    //
+    //     1) Enable replayable page fault interrupts
+    //     2) Rearm pulse based interrupts
+    //     3) Unlock GPU isr.replayable_faults.service_lock (mutex)
+    //     4) Unlock isr.interrupts_lock (spin lock)
+    //
+    // ...because the moment that page fault interrupts are reenabled, a top
+    // half might start receiving them. A top-half cannot run on the core
+    // executing this code as interrupts are disabled as long as the
+    // interrupts_lock is held. If it runs on a different core, it's going to
+    // spin waiting for the interrupts_lock to be released by this core before
+    // attempting to acquire the service_lock mutex. Hence there is no risk of
+    // the top-half missing interrupts after they are reenabled, but before the
+    // service_lock mutex is released.
+
+    if (parent_gpu->isr.replayable_faults.handling) {
+        // Turn page fault interrupts back on, unless remove_gpu() has already removed this GPU
+        // from the GPU table. remove_gpu() indicates that situation by setting
+        // gpu->replayable_faults.handling to false.
+        //
+        // This path can only be taken from the bottom half. User threads
+        // calling this function must have previously retained the GPU, so they
+        // can't race with remove_gpu.
+        //
+        // TODO: Bug 1766600: Assert that we're in a bottom half thread, once
+        //       that's tracked by the lock assertion code.
+        //
+        // Note that if we're in the bottom half and the GPU was removed before
+        // we checked replayable_faults.handling, we won't drop our interrupt
+        // disable ref count from the corresponding top-half call to
+        // uvm_gpu_replayable_faults_intr_disable. That's ok because remove_gpu
+        // ignores the refcount after waiting for the bottom half to finish.
+        uvm_gpu_replayable_faults_intr_enable(parent_gpu);
+
+        // Rearm pulse interrupts. This guarantees that the state of the pending
+        // interrupt is current and the top level rearm performed by RM is only
+        // going to trigger it if necessary. This avoids both of the possible
+        // bad cases:
+        //  1) GET != PUT but interrupt state is not pending
+        //     This could lead to the interrupt being lost.
+        //  2) GET == PUT but interrupt state is pending
+        //     This could lead to an interrupt storm as the top-half would see
+        //     no work to be done, but the interrupt would get constantly
+        //     retriggered by RM's top level rearm.
+        // clear_replayable_faults is a no-op for architectures that don't
+        // support pulse-based interrupts.
+        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
+                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+    }
+
+    // This unlock call has to be out-of-order unlock due to interrupts_lock
+    // still being held. Otherwise, it would result in a lock order violation.
+    uvm_up_out_of_order(&parent_gpu->isr.replayable_faults.service_lock);
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+}
+
+void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
+
+    uvm_down(&parent_gpu->isr.non_replayable_faults.service_lock);
+}
+
+void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
+
+    uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
+}
+
+void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
+{
+    // See comments in uvm_gpu_replayable_faults_isr_lock
+
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    uvm_gpu_access_counters_intr_disable(parent_gpu);
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+
+    uvm_down(&parent_gpu->isr.access_counters.service_lock);
+}
+
+void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
+
+    // See comments in uvm_gpu_replayable_faults_isr_unlock
+
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    uvm_gpu_access_counters_intr_enable(parent_gpu);
+
+    if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
+        parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
+                                                                                  parent_gpu->access_counter_buffer_info.cached_get);
+    }
+
+    // This unlock call has to be out-of-order unlock due to interrupts_lock
+    // still being held. Otherwise, it would result in a lock order violation.
+    uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+}
+
+static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+
+    if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0)
+        parent_gpu->fault_buffer_hal->disable_replayable_faults(parent_gpu);
+
+    ++parent_gpu->isr.replayable_faults.disable_intr_ref_count;
+}
+
+static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+    UVM_ASSERT(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0);
+
+    --parent_gpu->isr.replayable_faults.disable_intr_ref_count;
+    if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0)
+        parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
+}
+
+void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+
+    // The read of handling_ref_count could race with a write from
+    // gpu_access_counters_enable/disable, since here we may not hold the
+    // ISR lock. But those functions are invoked with the interrupt disabled
+    // (disable_intr_ref_count > 0), so the check always returns false when the
+    // race occurs
+    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
+    }
+
+    ++parent_gpu->isr.access_counters.disable_intr_ref_count;
+}
+
+void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
+
+    --parent_gpu->isr.access_counters.disable_intr_ref_count;
+
+    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
+    }
+}
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -0,0 +1,196 @@
+/*******************************************************************************
+    Copyright (c) 2016-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_GPU_ISR_H__
+#define __UVM_GPU_ISR_H__
+
+#include "nv-kthread-q.h"
+#include "uvm_common.h"
+#include "uvm_lock.h"
+#include "uvm_forward_decl.h"
+
+// ISR handling state for a specific interrupt type
+typedef struct
+{
+    // Protects against changes to the GPU data structures used by the handling
+    // routines of this interrupt type.
+    uvm_semaphore_t service_lock;
+
+    // Bottom-half to be executed for this interrupt. There is one bottom-half
+    // per interrupt type.
+    nv_kthread_q_item_t bottom_half_q_item;
+
+    union
+    {
+        // Used for replayable and non-replayable faults.
+        struct
+        {
+            // This is set to true during add_gpu(), if the GPU supports the
+            // interrupt. It is set back to false during remove_gpu().
+            // interrupts_lock must be held in order to write this variable.
+            bool handling;
+
+            // Variable set in uvm_gpu_disable_isr() during remove_gpu() to
+            // indicate if this type of interrupt was being handled by the
+            // driver.
+            bool was_handling;
+        };
+
+        // Used for access counters.
+        //
+        // If the GPU does not support access counters, the ref count is always
+        // zero. Otherwise, the refcount is incremented when the GPU is
+        // registered in a VA space for the first time, and decremented when
+        // unregistered or the VA space is destroyed.
+        //
+        // Locking: protected by the GPU access counters ISR lock. Naked
+        // accesses are allowed during GPU addition and removal.
+        NvU64 handling_ref_count;
+    };
+
+    struct
+    {
+        // Number of the bottom-half invocations for this interrupt on a GPU over
+        // its lifetime
+        NvU64 bottom_half_count;
+
+        // A bitmask of the CPUs on which the bottom half has executed. The
+        // corresponding bit gets set once the bottom half executes on that
+        // CPU.
+        // This mask is useful when testing that the bottom half is getting
+        // executed on the correct set of CPUs.
+        struct cpumask cpus_used_mask;
+
+        // An array (one per possible CPU), which holds the number of times the
+        // bottom half has executed on that CPU.
+        NvU64 *cpu_exec_count;
+    } stats;
+
+    // This is the number of times the function that disables this type of
+    // interrupt has been called without a corresponding call to the function
+    // that enables it. If this is > 0, interrupts are disabled. This field is
+    // protected by interrupts_lock. This field is only valid for interrupts
+    // directly owned by UVM:
+    // - replayable_faults
+    // - access_counters
+    NvU64 disable_intr_ref_count;
+} uvm_intr_handler_t;
+
+// State for all ISR handling in UVM
+typedef struct
+{
+    // This is set by uvm_suspend() and uvm_resume() to indicate whether
+    // top-half ISR processing is suspended for power management.  Calls from
+    // the RM's top-half are to be completed without processing when this
+    // flag is set to true.
+    bool is_suspended;
+
+    // There is exactly one nv_kthread_q per GPU. It is used for the ISR bottom
+    // halves. So N CPUs will be servicing M GPUs, in general. There is one
+    // bottom-half per interrupt type.
+    nv_kthread_q_t bottom_half_q;
+
+    // Protects the state of interrupts (enabled/disabled) and whether the GPU is
+    // currently handling them. Taken in both interrupt and process context.
+    uvm_spinlock_irqsave_t interrupts_lock;
+
+    uvm_intr_handler_t replayable_faults;
+    uvm_intr_handler_t non_replayable_faults;
+    uvm_intr_handler_t access_counters;
+
+    // Kernel thread used to kill channels on fatal non-replayable faults.
+    // This is needed because we cannot call into RM from the bottom-half to
+    // avoid deadlocks.
+    nv_kthread_q_t kill_channel_q;
+
+    // Number of top-half ISRs called for this GPU over its lifetime
+    NvU64 interrupt_count;
+} uvm_isr_info_t;
+
+// Entry point for interrupt handling. This is called from RM's top half
+NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
+
+// Initialize ISR handling state
+NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
+
+// Flush any currently scheduled bottom halves.  This is called during GPU
+// removal.
+void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
+
+// Prevent new bottom halves from being scheduled. This is called during parent
+// GPU removal.
+void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
+
+// Destroy ISR handling state and return interrupt ownership to RM. This is
+// called during parent GPU removal
+void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
+
+// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
+// half thread.  This will also disable replayable page fault interrupts (if
+// supported by the GPU) because the top half attempts to take this lock, and we
+// would cause an interrupt storm if we didn't disable them first.
+//
+// At least one GPU under the parent must have been previously retained.
+void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
+
+// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
+// re-enable replayable page fault interrupts.  Unlike
+// uvm_gpu_replayable_faults_isr_lock(), which should only called from
+// non-top/bottom half threads, this can be called by any thread.
+void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+
+// Lock/unlock routines for non-replayable faults. These do not need to prevent
+// interrupt storms since the GPU fault buffers for non-replayable faults are
+// managed by RM.  Unlike uvm_gpu_replayable_faults_isr_lock, no GPUs under
+// the parent need to have been previously retained.
+void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+
+// See uvm_gpu_replayable_faults_isr_lock/unlock
+void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+
+// Increments the reference count tracking whether access counter interrupts
+// should be disabled. The caller is guaranteed that access counter interrupts
+// are disabled upon return. Interrupts might already be disabled prior to
+// making this call. Each call is ref-counted, so this must be paired with a
+// call to uvm_gpu_access_counters_intr_enable().
+//
+// parent_gpu->isr.interrupts_lock must be held to call this function.
+void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+
+// Decrements the reference count tracking whether access counter interrupts
+// should be disabled. Only once the count reaches 0 are the HW interrupts
+// actually enabled, so this call does not guarantee that the interrupts have
+// been re-enabled upon return.
+//
+// uvm_gpu_access_counters_intr_disable() must have been called prior to calling
+// this function.
+//
+// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
+// the interrupt.
+//
+// parent_gpu->isr.interrupts_lock must be held to call this function.
+void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+
+#endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -0,0 +1,683 @@
+/*******************************************************************************
+    Copyright (c) 2017-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#include "nv_uvm_interface.h"
+#include "uvm_common.h"
+#include "uvm_api.h"
+#include "uvm_gpu_non_replayable_faults.h"
+#include "uvm_gpu.h"
+#include "uvm_hal.h"
+#include "uvm_lock.h"
+#include "uvm_tools.h"
+#include "uvm_user_channel.h"
+#include "uvm_va_space_mm.h"
+#include "uvm_va_block.h"
+#include "uvm_va_range.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_ats_faults.h"
+
+// In the context of a CUDA application using Unified Memory, it is sometimes
+// assumed that there is a single type of fault, originated by a memory
+// load/store in a SM (Graphics Engine), which itself can be traced back to a
+// memory access in a CUDA kernel written by a developer. In reality, faults can
+// also be triggered by other parts of the GPU i.e. by other engines, as the
+// result of developer-facing APIs, or operations initiated by a user-mode
+// driver. The Graphics Engine faults are called replayable faults, while the
+// rest are called non-replayable. The differences between the two types of
+// faults go well beyond the engine originating the fault.
+//
+// A non-replayable fault originates in an engine other than Graphics. UVM
+// services non-replayable faults from the Copy and PBDMA (Host/ESCHED) Engines.
+// Non-replayable faults originated in other engines are considered fatal, and
+// do not reach the UVM driver. While UVM can distinguish between faults
+// originated in the Copy Engine and faults originated in the PBDMA Engine, in
+// practice they are all processed in the same way. Replayable fault support in
+// Graphics was introduced in Pascal, and non-replayable fault support in CE and
+// PBDMA Engines was introduced in Volta; all non-replayable faults were fatal
+// before Volta.
+//
+// An example of a Copy Engine non-replayable fault is a memory copy between two
+// virtual addresses on a GPU, in which either the source or destination
+// pointers are not currently mapped to a physical address in the page tables of
+// the GPU. An example of a PBDMA non-replayable fault is a semaphore acquire in
+// which the semaphore virtual address passed as argument is currently not
+// mapped to any physical address.
+//
+// Non-replayable faults originated in the CE and PBDMA Engines result in HW
+// preempting the channel associated with the fault, a mechanism called "fault
+// and switch". More precisely, the switching out affects not only the channel
+// that caused the fault, but all the channels in the same Time Slice Group
+// (TSG). SW intervention is required so all the channels in the TSG can be
+// scheduled again, but channels in other TSGs can be scheduled and resume their
+// normal execution. In the case of the non-replayable faults serviced by UVM,
+// the driver clears a channel's faulted bit upon successful servicing, but it
+// is only when the servicing has completed for all the channels in the TSG that
+// they are all allowed to be switched in.  Non-replayable faults originated in
+// engines other than CE and PBDMA are fatal because these other units lack
+// hardware support for the "fault and switch" and restart mechanisms just
+// described.
+// On the other hand, replayable faults block preemption of the channel until
+// software (UVM) services the fault. This is sometimes known as "fault and
+// stall". Note that replayable faults prevent the execution of other channels,
+// which are stalled until the fault is serviced.
+//
+// The "non-replayable" naming alludes to the fact that, historically, these
+// faults indicated a fatal condition so there was no recovery ("replay")
+// process, and SW could not ignore or drop the fault. As discussed before, this
+// is no longer the case and while at times the hardware documentation uses the
+// "fault and replay" expression for CE and PBDMA faults, we reserve that
+// expression for Graphics faults and favor the term "fault and reschedule"
+// instead. Replaying a fault does not necessarily imply that UVM has serviced
+// it. For example, the UVM driver may choose to ignore the replayable faults
+// associated with a GPU for some period of time if it detects that there is
+// thrashing going on, and the GPU needs to be throttled. The fault entries
+// corresponding to the ignored faults are never saved by UVM, but new entries
+// (and new interrupts) will be generated by hardware each time after UVM issues
+// a replay.
+//
+// While replayable faults are always the responsibility of UVM, the servicing
+// of non-replayable faults is split between RM and UVM. In the case of
+// replayable faults, UVM has sole SW ownership of the hardware buffer
+// containing the faults, and it is responsible for updating the GET pointer to
+// signal the hardware that a number of faults have been read. UVM also reads
+// the PUT pointer value written by hardware. But in the case of non-replayable
+// faults, UVM reads the fault entries out of a regular CPU buffer, shared with
+// RM, called "shadow buffer". RM is responsible for accessing the actual
+// non-replayable hardware buffer, reading the PUT pointer, updating the GET
+// pointer, and moving CE and PBDMA faults from the hardware buffer to the
+// shadow buffer. Because the Resource Manager owns the HW buffer, UVM needs to
+// call RM when servicing a non-replayable fault, first to figure out if there
+// is a pending fault, and then to read entries from the shadow buffer.
+//
+// Once UVM has parsed a non-replayable fault entry corresponding to managed
+// memory, and identified the VA block associated with it, the servicing logic
+// for that block is identical to that of a replayable fault, see
+// uvm_va_block_service_locked. Another similarity between the two types of
+// faults is that they use the same entry format, uvm_fault_buffer_entry_t.
+
+
+// There is no error handling in this function. The caller is in charge of
+// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
+NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+
+    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
+
+    non_replayable_faults->shadow_buffer_copy = NULL;
+    non_replayable_faults->fault_cache        = NULL;
+
+    non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
+                                        parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
+
+    non_replayable_faults->shadow_buffer_copy =
+        uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
+    if (!non_replayable_faults->shadow_buffer_copy)
+        return NV_ERR_NO_MEMORY;
+
+    non_replayable_faults->fault_cache = uvm_kvmalloc_zero(non_replayable_faults->max_faults *
+                                                           sizeof(*non_replayable_faults->fault_cache));
+    if (!non_replayable_faults->fault_cache)
+        return NV_ERR_NO_MEMORY;
+
+    uvm_tracker_init(&non_replayable_faults->clear_faulted_tracker);
+    uvm_tracker_init(&non_replayable_faults->fault_service_tracker);
+
+    return NV_OK;
+}
+
+void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+
+    if (non_replayable_faults->fault_cache) {
+        UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
+        uvm_tracker_deinit(&non_replayable_faults->clear_faulted_tracker);
+
+        UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->fault_service_tracker));
+        uvm_tracker_deinit(&non_replayable_faults->fault_service_tracker);
+    }
+
+    uvm_kvfree(non_replayable_faults->shadow_buffer_copy);
+    uvm_kvfree(non_replayable_faults->fault_cache);
+    non_replayable_faults->shadow_buffer_copy = NULL;
+    non_replayable_faults->fault_cache        = NULL;
+}
+
+bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
+{
+    NV_STATUS status;
+    NvBool has_pending_faults;
+
+    UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
+
+    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+                                                         &has_pending_faults);
+    UVM_ASSERT(status == NV_OK);
+
+    return has_pending_faults == NV_TRUE;
+}
+
+static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    NvU32 i = 0;
+    NvU32 cached_faults = 0;
+    uvm_fault_buffer_entry_t *fault_cache;
+    NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
+
+    fault_cache = non_replayable_faults->fault_cache;
+
+    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
+    UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
+
+    status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
+                                                  non_replayable_faults->shadow_buffer_copy,
+                                                  &cached_faults);
+    UVM_ASSERT(status == NV_OK);
+
+    // Parse all faults
+    for (i = 0; i < cached_faults; ++i) {
+        uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
+
+        gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
+
+        // The GPU aligns the fault addresses to 4k, but all of our tracking is
+        // done in PAGE_SIZE chunks which might be larger.
+        fault_entry->fault_address = UVM_PAGE_ALIGN_DOWN(fault_entry->fault_address);
+
+        // Make sure that all fields in the entry are properly initialized
+        fault_entry->va_space = NULL;
+        fault_entry->is_fatal = (fault_entry->fault_type >= UVM_FAULT_TYPE_FATAL);
+        fault_entry->filtered = false;
+
+        fault_entry->num_instances = 1;
+        fault_entry->access_type_mask = uvm_fault_access_type_mask_bit(fault_entry->fault_access_type);
+        INIT_LIST_HEAD(&fault_entry->merged_instances_list);
+        fault_entry->non_replayable.buffer_index = i;
+
+        if (fault_entry->is_fatal) {
+            // Record the fatal fault event later as we need the va_space locked
+            fault_entry->fatal_reason = UvmEventFatalReasonInvalidFaultType;
+        }
+        else {
+            fault_entry->fatal_reason = UvmEventFatalReasonInvalid;
+        }
+
+        current_hw_entry += entry_size;
+    }
+
+    return cached_faults;
+}
+
+// In SRIOV, the UVM (guest) driver does not have access to the privileged
+// registers used to clear the faulted bit. Instead, UVM requests host RM to do
+// the clearing on its behalf, using a SW method.
+static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
+{
+    if (uvm_gpu_is_virt_mode_sriov(gpu)) {
+        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
+        return true;
+    }
+
+    return false;
+}
+
+static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
+                                             uvm_user_channel_t *user_channel,
+                                             const uvm_fault_buffer_entry_t *fault_entry,
+                                             NvU32 batch_id,
+                                             uvm_tracker_t *tracker)
+{
+    NV_STATUS status;
+    uvm_push_t push;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+
+    UVM_ASSERT(!fault_entry->is_fatal);
+
+    status = uvm_push_begin_acquire(gpu->channel_manager,
+                                    UVM_CHANNEL_TYPE_MEMOPS,
+                                    tracker,
+                                    &push,
+                                    "Clearing set bit for address 0x%llx",
+                                    fault_entry->fault_address);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Error acquiring tracker before clearing faulted: %s, GPU %s\n",
+                      nvstatusToString(status),
+                      uvm_gpu_name(gpu));
+        return status;
+    }
+
+    if (use_clear_faulted_channel_sw_method(gpu))
+        gpu->parent->host_hal->clear_faulted_channel_sw_method(&push, user_channel, fault_entry);
+    else
+        gpu->parent->host_hal->clear_faulted_channel_method(&push, user_channel, fault_entry);
+
+    uvm_tools_broadcast_replay(gpu, &push, batch_id, fault_entry->fault_source.client_type);
+
+    uvm_push_end(&push);
+
+    // Add this push to the GPU's clear_faulted_tracker so GPU removal can wait
+    // on it.
+    status = uvm_tracker_add_push_safe(&non_replayable_faults->clear_faulted_tracker, &push);
+
+    // Add this push to the channel's clear_faulted_tracker so user channel
+    // removal can wait on it instead of using the per-GPU tracker, which would
+    // require a lock.
+    if (status == NV_OK)
+        status = uvm_tracker_add_push_safe(&user_channel->clear_faulted_tracker, &push);
+
+    return status;
+}
+
+static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
+                                               uvm_user_channel_t *user_channel,
+                                               const uvm_fault_buffer_entry_t *fault_entry,
+                                               NvU32 batch_id,
+                                               uvm_tracker_t *tracker)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
+
+    // We need to wait for all pending work before writing to the channel
+    // register
+    status = uvm_tracker_wait(tracker);
+    if (status != NV_OK)
+        return status;
+
+    gpu->parent->host_hal->clear_faulted_channel_register(user_channel, fault_entry);
+
+    uvm_tools_broadcast_replay_sync(gpu, batch_id, fault_entry->fault_source.client_type);
+
+    return NV_OK;
+}
+
+static NV_STATUS clear_faulted_on_gpu(uvm_gpu_t *gpu,
+                                      uvm_user_channel_t *user_channel,
+                                      const uvm_fault_buffer_entry_t *fault_entry,
+                                      NvU32 batch_id,
+                                      uvm_tracker_t *tracker)
+{
+    if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu))
+        return clear_faulted_method_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
+
+    return clear_faulted_register_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
+}
+
+static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
+                                                       uvm_va_block_t *va_block,
+                                                       uvm_va_block_retry_t *va_block_retry,
+                                                       uvm_fault_buffer_entry_t *fault_entry,
+                                                       uvm_service_block_context_t *service_context)
+{
+    NV_STATUS status = NV_OK;
+    uvm_page_index_t page_index;
+    uvm_perf_thrashing_hint_t thrashing_hint;
+    uvm_processor_id_t new_residency;
+    bool read_duplicate;
+    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
+    uvm_va_range_t *va_range = va_block->va_range;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+
+    UVM_ASSERT(!fault_entry->is_fatal);
+
+    uvm_assert_rwsem_locked(&va_space->lock);
+
+    UVM_ASSERT(fault_entry->va_space == va_space);
+    UVM_ASSERT(fault_entry->fault_address >= va_block->start);
+    UVM_ASSERT(fault_entry->fault_address <= va_block->end);
+
+    service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
+
+    if (service_context->num_retries == 0) {
+        // notify event to tools/performance heuristics. For now we use a
+        // unique batch id per fault, since we clear the faulted channel for
+        // each fault.
+        uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
+                                        va_block,
+                                        gpu->id,
+                                        service_context->block_context.policy->preferred_location,
+                                        fault_entry,
+                                        ++non_replayable_faults->batch_id,
+                                        false);
+    }
+
+    // Check logical permissions
+    status = uvm_va_range_check_logical_permissions(va_range,
+                                                    gpu->id,
+                                                    fault_entry->fault_access_type,
+                                                    uvm_range_group_address_migratable(va_space,
+                                                                                       fault_entry->fault_address));
+    if (status != NV_OK) {
+        fault_entry->is_fatal = true;
+        fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+        return NV_OK;
+    }
+
+    // TODO: Bug 1880194: Revisit thrashing detection
+    thrashing_hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
+
+    service_context->read_duplicate_count = 0;
+    service_context->thrashing_pin_count = 0;
+
+    page_index = uvm_va_block_cpu_page_index(va_block, fault_entry->fault_address);
+
+    // Compute new residency and update the masks
+    new_residency = uvm_va_block_select_residency(va_block,
+                                                  page_index,
+                                                  gpu->id,
+                                                  fault_entry->access_type_mask,
+                                                  service_context->block_context.policy,
+                                                  &thrashing_hint,
+                                                  UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
+                                                  &read_duplicate);
+
+    // Initialize the minimum necessary state in the fault service context
+    uvm_processor_mask_zero(&service_context->resident_processors);
+
+    // Set new residency and update the masks
+    uvm_processor_mask_set(&service_context->resident_processors, new_residency);
+
+    // The masks need to be fully zeroed as the fault region may grow due to prefetching
+    uvm_page_mask_zero(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency);
+    uvm_page_mask_set(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency, page_index);
+
+    if (read_duplicate) {
+        uvm_page_mask_zero(&service_context->read_duplicate_mask);
+        uvm_page_mask_set(&service_context->read_duplicate_mask, page_index);
+        service_context->read_duplicate_count = 1;
+    }
+
+    service_context->access_type[page_index] = fault_entry->fault_access_type;
+
+    service_context->region = uvm_va_block_region_for_page(page_index);
+
+    status = uvm_va_block_service_locked(gpu->id, va_block, va_block_retry, service_context);
+
+    ++service_context->num_retries;
+
+    return status;
+}
+
+static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
+                                                struct mm_struct *mm,
+                                                uvm_va_block_t *va_block,
+                                                uvm_fault_buffer_entry_t *fault_entry)
+{
+    NV_STATUS status, tracker_status;
+    uvm_va_block_retry_t va_block_retry;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
+
+    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
+    service_context->num_retries = 0;
+    service_context->block_context.mm = mm;
+
+    uvm_mutex_lock(&va_block->lock);
+
+    status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
+                                       service_managed_fault_in_block_locked(gpu,
+                                                                             va_block,
+                                                                             &va_block_retry,
+                                                                             fault_entry,
+                                                                             service_context));
+
+    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
+                                                  &va_block->tracker);
+
+    uvm_mutex_unlock(&va_block->lock);
+
+    return status == NV_OK? tracker_status: status;
+}
+
+// See uvm_unregister_channel for comments on the the channel destruction
+// sequence.
+static void kill_channel_delayed(void *_user_channel)
+{
+    uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
+    uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
+
+    UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
+
+    uvm_va_space_down_read_rm(va_space);
+    if (user_channel->gpu_va_space) {
+        // RM handles the fault, which will do the correct fault reporting in the
+        // kernel logs and will initiate channel teardown
+        NV_STATUS status = nvUvmInterfaceReportNonReplayableFault(uvm_gpu_device_handle(user_channel->gpu),
+                                                                  user_channel->kill_channel.fault_packet);
+        UVM_ASSERT(status == NV_OK);
+    }
+    uvm_va_space_up_read_rm(va_space);
+
+    uvm_user_channel_release(user_channel);
+}
+
+static void kill_channel_delayed_entry(void *user_channel)
+{
+    UVM_ENTRY_VOID(kill_channel_delayed(user_channel));
+}
+
+static void schedule_kill_channel(uvm_gpu_t *gpu,
+                                  uvm_fault_buffer_entry_t *fault_entry,
+                                  uvm_user_channel_t *user_channel)
+{
+    uvm_va_space_t *va_space = fault_entry->va_space;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
+                   (fault_entry->non_replayable.buffer_index * gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
+
+    UVM_ASSERT(gpu);
+    UVM_ASSERT(va_space);
+    UVM_ASSERT(user_channel);
+
+    if (user_channel->kill_channel.scheduled)
+        return;
+
+    user_channel->kill_channel.scheduled = true;
+    user_channel->kill_channel.va_space = va_space;
+
+    // Save the packet to be handled by RM in the channel structure
+    memcpy(user_channel->kill_channel.fault_packet, packet, gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
+
+    // Retain the channel here so it is not prematurely destroyed. It will be
+    // released after forwarding the fault to RM in kill_channel_delayed.
+    uvm_user_channel_retain(user_channel);
+
+    // Schedule a work item to kill the channel
+    nv_kthread_q_item_init(&user_channel->kill_channel.kill_channel_q_item,
+                           kill_channel_delayed_entry,
+                           user_channel);
+
+    nv_kthread_q_schedule_q_item(&gpu->parent->isr.kill_channel_q,
+                                 &user_channel->kill_channel.kill_channel_q_item);
+}
+
+static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
+                                           struct mm_struct *mm,
+                                           uvm_fault_buffer_entry_t *fault_entry,
+                                           NV_STATUS lookup_status)
+{
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
+    NV_STATUS status = lookup_status;
+
+    UVM_ASSERT(!fault_entry->is_fatal);
+
+    // Avoid dropping fault events when the VA block is not found or cannot be created
+    uvm_perf_event_notify_gpu_fault(&fault_entry->va_space->perf_events,
+                                    NULL,
+                                    gpu->id,
+                                    UVM_ID_INVALID,
+                                    fault_entry,
+                                    ++non_replayable_faults->batch_id,
+                                    false);
+
+    if (status != NV_ERR_INVALID_ADDRESS)
+        return status;
+
+    if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
+        ats_invalidate->write_faults_in_batch = false;
+
+        // The VA isn't managed. See if ATS knows about it.
+        status = uvm_ats_service_fault_entry(gpu_va_space, fault_entry, ats_invalidate);
+
+        // Invalidate ATS TLB entries if needed
+        if (status == NV_OK) {
+            status = uvm_ats_invalidate_tlbs(gpu_va_space,
+                                             ats_invalidate,
+                                             &non_replayable_faults->fault_service_tracker);
+        }
+    }
+    else {
+        UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
+        fault_entry->is_fatal = true;
+        fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+
+        // Do not return error due to logical errors in the application
+        status = NV_OK;
+    }
+
+    return status;
+}
+
+static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
+{
+    NV_STATUS status;
+    uvm_user_channel_t *user_channel;
+    uvm_va_block_t *va_block;
+    uvm_va_space_t *va_space = NULL;
+    struct mm_struct *mm;
+    uvm_gpu_va_space_t *gpu_va_space;
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_va_block_context_t *va_block_context =
+        &gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
+
+    status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
+    if (status != NV_OK) {
+        // The VA space lookup will fail if we're running concurrently with
+        // removal of the channel from the VA space (channel unregister, GPU VA
+        // space unregister, VA space destroy, etc). The other thread will stop
+        // the channel and remove the channel from the table, so the faulting
+        // condition will be gone. In the case of replayable faults we need to
+        // flush the buffer, but here we can just ignore the entry and proceed
+        // on.
+        //
+        // Note that we can't have any subcontext issues here, since non-
+        // replayable faults only use the address space of their channel.
+        UVM_ASSERT(status == NV_ERR_INVALID_CHANNEL);
+        UVM_ASSERT(!va_space);
+        return NV_OK;
+    }
+
+    UVM_ASSERT(va_space);
+
+    // If an mm is registered with the VA space, we have to retain it
+    // in order to lock it before locking the VA space. It is guaranteed
+    // to remain valid until we release. If no mm is registered, we
+    // can only service managed faults, not ATS/HMM faults.
+    mm = uvm_va_space_mm_retain_lock(va_space);
+
+    uvm_va_space_down_read(va_space);
+
+    gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
+
+    if (!gpu_va_space) {
+        // The va_space might have gone away. See the comment above.
+        status = NV_OK;
+        goto exit_no_channel;
+    }
+
+    fault_entry->va_space = va_space;
+
+    user_channel = uvm_gpu_va_space_get_user_channel(gpu_va_space, fault_entry->instance_ptr);
+    if (!user_channel) {
+        // The channel might have gone away. See the comment above.
+        status = NV_OK;
+        goto exit_no_channel;
+    }
+
+    fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
+
+    if (!fault_entry->is_fatal) {
+        status = uvm_va_block_find_create(fault_entry->va_space,
+                                          mm,
+                                          fault_entry->fault_address,
+                                          va_block_context,
+                                          &va_block);
+        if (status == NV_OK)
+            status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
+        else
+            status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
+
+        // We are done, we clear the faulted bit on the channel, so it can be
+        // re-scheduled again
+        if (status == NV_OK && !fault_entry->is_fatal) {
+            status = clear_faulted_on_gpu(gpu,
+                                          user_channel,
+                                          fault_entry,
+                                          non_replayable_faults->batch_id,
+                                          &non_replayable_faults->fault_service_tracker);
+            uvm_tracker_clear(&non_replayable_faults->fault_service_tracker);
+        }
+    }
+
+    if (fault_entry->is_fatal)
+        uvm_tools_record_gpu_fatal_fault(gpu->parent->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
+
+    if (status != NV_OK || fault_entry->is_fatal)
+        schedule_kill_channel(gpu, fault_entry, user_channel);
+
+exit_no_channel:
+    uvm_va_space_up_read(va_space);
+    uvm_va_space_mm_release_unlock(va_space, mm);
+
+    return status;
+}
+
+void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
+{
+    NV_STATUS status = NV_OK;
+    NvU32 cached_faults;
+
+    // If this handler is modified to handle fewer than all of the outstanding
+    // faults, then special handling will need to be added to uvm_suspend()
+    // to guarantee that fault processing has completed before control is
+    // returned to the RM.
+    while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
+        NvU32 i;
+
+        // Differently to replayable faults, we do not batch up and preprocess
+        // non-replayable faults since getting multiple faults on the same
+        // memory region is not very likely
+        for (i = 0; i < cached_faults; ++i) {
+            status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
+            if (status != NV_OK)
+                break;
+        }
+    }
+
+    if (status != NV_OK)
+        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
+}
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
@@ -0,0 +1,37 @@
+/*******************************************************************************
+    Copyright (c) 2017 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#ifndef __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
+#define __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
+
+#include <nvstatus.h>
+#include "uvm_forward_decl.h"
+
+bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
+
+NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+
+#endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@@ -0,0 +1,78 @@
+/*******************************************************************************
+    Copyright (c) 2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_GPU_PAGE_FAULT_H__
+#define __UVM_GPU_PAGE_FAULT_H__
+
+#include "nvtypes.h"
+#include "uvm_types.h"
+#include "uvm_hal_types.h"
+#include "uvm_tracker.h"
+
+typedef enum
+{
+    // Issue a fault replay after all faults for a block within a batch have been serviced
+    UVM_PERF_FAULT_REPLAY_POLICY_BLOCK = 0,
+
+    // Issue a fault replay after each fault batch has been serviced
+    UVM_PERF_FAULT_REPLAY_POLICY_BATCH,
+
+    // Like UVM_PERF_FAULT_REPLAY_POLICY_BATCH but only one batch of faults is serviced. The fault buffer is flushed
+    // before issuing the replay. The potential benefit is that we can resume execution of some SMs earlier, if SMs
+    // are faulting on different sets of pages.
+    UVM_PERF_FAULT_REPLAY_POLICY_BATCH_FLUSH,
+
+    // Issue a fault replay after all faults in the buffer have been serviced
+    UVM_PERF_FAULT_REPLAY_POLICY_ONCE,
+
+    // TODO: Bug 1768226: Implement uTLB-aware fault replay policy
+
+    UVM_PERF_FAULT_REPLAY_POLICY_MAX,
+} uvm_perf_fault_replay_policy_t;
+
+const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);
+
+NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
+
+bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+
+// Clear valid bit for all remaining unserviced faults in the buffer, set GET to
+// PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
+// wait for the replay to complete before returning. The pushed replay is added
+// to the GPU's replay_tracker.
+//
+// LOCKING: Takes gpu->isr_lock
+NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);
+
+// Enable/disable HW support for prefetch-initiated faults
+void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+
+// Service pending replayable faults on the given GPU. This function must be
+// only called from the ISR bottom half
+void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
+
+#endif // __UVM_GPU_PAGE_FAULT_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -0,0 +1,551 @@
+/*******************************************************************************
+    Copyright (c) 2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_gpu_semaphore.h"
+#include "uvm_lock.h"
+#include "uvm_global.h"
+#include "uvm_kvmalloc.h"
+
+#define UVM_SEMAPHORE_SIZE 4
+#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
+#define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
+
+// The top nibble of the canary base is intentionally 0. The rest of the value
+// is arbitrary. See the comments below on make_canary.
+#define UVM_SEMAPHORE_CANARY_BASE     0x0badc0de
+#define UVM_SEMAPHORE_CANARY_MASK     0xf0000000
+
+struct uvm_gpu_semaphore_pool_struct
+{
+    // The GPU owning the pool
+    uvm_gpu_t *gpu;
+
+    // List of all the semaphore pages belonging to the pool
+    struct list_head pages;
+
+    // Count of free semaphores among all the pages
+    NvU32 free_semaphores_count;
+
+    // Lock protecting the state of the pool
+    uvm_mutex_t mutex;
+};
+
+struct uvm_gpu_semaphore_pool_page_struct
+{
+    // Allocation backing the page
+    uvm_rm_mem_t *memory;
+
+    // Pool the page is part of
+    uvm_gpu_semaphore_pool_t *pool;
+
+    // Node in the list of all pages in a semaphore pool
+    struct list_head all_pages_node;
+
+    // Mask indicating free semaphore indices within the page
+    DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
+};
+
+static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
+{
+    NvU32 offset;
+    NvU32 index;
+
+    UVM_ASSERT(semaphore->payload != NULL);
+    UVM_ASSERT(semaphore->page != NULL);
+
+    offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
+    UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
+
+    index = offset / UVM_SEMAPHORE_SIZE;
+    UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
+
+    return index;
+}
+
+// Use canary values on debug builds to catch semaphore use-after-free. We can
+// catch release-after-free by simply setting the payload to a known value at
+// free then checking it on alloc or pool free, but catching acquire-after-free
+// is a little trickier.
+//
+// In order to make still-pending GEQ acquires stall indefinitely we need to
+// reduce the current payload as much as we can, subject to two restrictions:
+//
+// 1) The pending acquires could be comparing against values much less than and
+//    much greater than the current payload, so we have to set the payload to a
+//    value reasonably less than the acquires which we might expect to be
+//    pending.
+//
+// 2) Going over halfway past a pending acquire on the 32-bit number wheel will
+//    cause Host to wrap and think the acquire succeeded. So we shouldn't reduce
+//    by more than 2^31.
+//
+// To handle these restrictions we'll deal with quadrants of 2^32, under the
+// assumption that it's unlikely for a payload to outpace a pending acquire by
+// more than 2^30.
+//
+// We also need for the base value to have some 0s in the upper significant
+// bits, otherwise those bits might carry us past the quadrant boundary when we
+// OR them in.
+static NvU32 make_canary(NvU32 payload)
+{
+    NvU32 prev_quadrant = payload - (1 << 30);
+    return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE;
+}
+
+static bool is_canary(NvU32 val)
+{
+    return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
+}
+
+static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
+{
+    NV_STATUS status;
+    uvm_gpu_semaphore_pool_page_t *pool_page;
+    NvU32 *payloads;
+    size_t i;
+
+    uvm_assert_mutex_locked(&pool->mutex);
+
+    pool_page = uvm_kvmalloc_zero(sizeof(*pool_page));
+
+    if (!pool_page)
+        return NV_ERR_NO_MEMORY;
+
+    pool_page->pool = pool;
+
+    status = uvm_rm_mem_alloc_and_map_all(pool->gpu, UVM_RM_MEM_TYPE_SYS, UVM_SEMAPHORE_PAGE_SIZE, &pool_page->memory);
+    if (status != NV_OK)
+        goto error;
+
+    // All semaphores are initially free
+    bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
+
+    list_add(&pool_page->all_pages_node, &pool->pages);
+    pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
+
+    // Initialize the semaphore payloads to known values
+    if (UVM_IS_DEBUG()) {
+        payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
+        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
+            payloads[i] = make_canary(0);
+    }
+
+    return NV_OK;
+
+error:
+    uvm_kvfree(pool_page);
+    return status;
+}
+
+static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
+{
+    uvm_gpu_semaphore_pool_t *pool;
+    NvU32 *payloads;
+    size_t i;
+
+    UVM_ASSERT(page);
+    pool = page->pool;
+
+    uvm_assert_mutex_locked(&pool->mutex);
+
+    // Assert that no semaphores are still allocated
+    UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE));
+    UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE,
+                   "count: %u\n",
+                   pool->free_semaphores_count);
+
+    // Check for semaphore release-after-free
+    if (UVM_IS_DEBUG()) {
+        payloads = uvm_rm_mem_get_cpu_va(page->memory);
+        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
+            UVM_ASSERT(is_canary(payloads[i]));
+    }
+
+    pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
+    list_del(&page->all_pages_node);
+    uvm_rm_mem_free(page->memory);
+    uvm_kvfree(page);
+}
+
+NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore)
+{
+    NV_STATUS status = NV_OK;
+    uvm_gpu_semaphore_pool_page_t *page;
+
+    memset(semaphore, 0, sizeof(*semaphore));
+
+    uvm_mutex_lock(&pool->mutex);
+
+    if (pool->free_semaphores_count == 0)
+        status = pool_alloc_page(pool);
+
+    if (status != NV_OK)
+        goto done;
+
+    list_for_each_entry(page, &pool->pages, all_pages_node) {
+        NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
+        if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
+            continue;
+
+        semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
+        semaphore->page = page;
+
+        // Check for semaphore release-after-free
+        UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
+
+        uvm_gpu_semaphore_set_payload(semaphore, 0);
+
+        __clear_bit(semaphore_index, page->free_semaphores);
+        --pool->free_semaphores_count;
+
+        goto done;
+    }
+
+    UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n");
+    status = NV_ERR_GENERIC;
+
+done:
+    uvm_mutex_unlock(&pool->mutex);
+
+    return status;
+}
+
+void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
+{
+    uvm_gpu_semaphore_pool_page_t *page;
+    uvm_gpu_semaphore_pool_t *pool;
+    NvU32 index;
+
+    UVM_ASSERT(semaphore);
+
+    // uvm_gpu_semaphore_t is to be embedded in other structures so it should always
+    // be accessible, but it may not be initialized in error cases. Early out if
+    // page is NULL indicating the semaphore hasn't been allocated successfully.
+    page = semaphore->page;
+    if (page == NULL)
+        return;
+
+    pool = page->pool;
+    index = get_index(semaphore);
+
+    // Write a known value lower than the current payload in an attempt to catch
+    // release-after-free and acquire-after-free.
+    if (UVM_IS_DEBUG())
+        uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
+
+    uvm_mutex_lock(&pool->mutex);
+
+    semaphore->page = NULL;
+    semaphore->payload = NULL;
+
+    ++pool->free_semaphores_count;
+    __set_bit(index, page->free_semaphores);
+
+    uvm_mutex_unlock(&pool->mutex);
+}
+
+NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
+{
+    uvm_gpu_semaphore_pool_t *pool;
+    pool = uvm_kvmalloc_zero(sizeof(*pool));
+
+    if (!pool)
+        return NV_ERR_NO_MEMORY;
+
+    uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
+
+    INIT_LIST_HEAD(&pool->pages);
+
+    pool->free_semaphores_count = 0;
+    pool->gpu = gpu;
+
+    *pool_out = pool;
+
+    return NV_OK;
+}
+
+void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
+{
+    uvm_gpu_semaphore_pool_page_t *page;
+    uvm_gpu_semaphore_pool_page_t *next_page;
+
+    if (!pool)
+        return;
+
+    // No other thread should be touching the pool once it's being destroyed
+    uvm_assert_mutex_unlocked(&pool->mutex);
+
+    // Keep pool_free_page happy
+    uvm_mutex_lock(&pool->mutex);
+
+    list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node)
+        pool_free_page(page);
+
+    UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count);
+    UVM_ASSERT(list_empty(&pool->pages));
+
+    uvm_mutex_unlock(&pool->mutex);
+
+    uvm_kvfree(pool);
+}
+
+NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
+{
+    NV_STATUS status = NV_OK;
+    uvm_gpu_semaphore_pool_page_t *page;
+
+    UVM_ASSERT(pool);
+    UVM_ASSERT(gpu);
+
+    uvm_mutex_lock(&pool->mutex);
+
+    list_for_each_entry(page, &pool->pages, all_pages_node) {
+        status = uvm_rm_mem_map_gpu(page->memory, gpu);
+        if (status != NV_OK)
+            goto done;
+    }
+
+done:
+    uvm_mutex_unlock(&pool->mutex);
+
+    return status;
+}
+
+void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
+{
+    uvm_gpu_semaphore_pool_page_t *page;
+
+    UVM_ASSERT(pool);
+    UVM_ASSERT(gpu);
+
+    uvm_mutex_lock(&pool->mutex);
+
+    list_for_each_entry(page, &pool->pages, all_pages_node)
+        uvm_rm_mem_unmap_gpu(page->memory, gpu);
+
+    uvm_mutex_unlock(&pool->mutex);
+}
+
+NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
+{
+    return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false);
+}
+
+NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
+{
+    return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true);
+}
+
+NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
+{
+    NvU32 index = get_index(semaphore);
+    NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
+
+    return base_va + UVM_SEMAPHORE_SIZE * index;
+}
+
+NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
+{
+    return UVM_GPU_READ_ONCE(*semaphore->payload);
+}
+
+void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
+{
+    // Provide a guarantee that all memory accesses prior to setting the payload
+    // won't be moved past it.
+    // Use a big hammer mb() as set_payload() is not used in any performance path
+    // today.
+    // This could likely be optimized to be either an smp_store_release() or use
+    // an smp_mb__before_atomic() barrier. The former is a recent addition to
+    // kernel though, and it's not clear whether combining the latter with a
+    // regular 32bit store is well defined in all cases. Both also seem to risk
+    // being optimized out on non-SMP configs (we need them for interacting with
+    // the GPU correctly even on non-SMP).
+    mb();
+    UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
+}
+
+// This function is intended to catch channels which have been left dangling in
+// trackers after their owning GPUs have been destroyed.
+static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem)
+{
+    uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu;
+    uvm_gpu_t *table_gpu;
+
+    UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic);
+
+    // It's ok for the GPU to not be in the global table, since add_gpu operates
+    // on trackers before adding the GPU to the table, and remove_gpu operates
+    // on trackers after removing the GPU. We rely on the magic value to catch
+    // those cases.
+    //
+    // But if a pointer is in the table it must match.
+    table_gpu = uvm_gpu_get(gpu->global_id);
+    if (table_gpu)
+        UVM_ASSERT(table_gpu == gpu);
+
+    // Return a boolean so this function can be used in assertions for
+    // conditional compilation
+    return true;
+}
+
+NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
+{
+    NV_STATUS status;
+
+    memset(tracking_sem, 0, sizeof(*tracking_sem));
+
+    status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore);
+    if (status != NV_OK)
+        return status;
+
+    UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
+
+    uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
+    atomic64_set(&tracking_sem->completed_value, 0);
+    tracking_sem->queued_value = 0;
+
+    return NV_OK;
+}
+
+void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
+{
+    uvm_gpu_semaphore_free(&tracking_sem->semaphore);
+}
+
+static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
+{
+    NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
+    // The semaphore value is the bottom 32 bits of completed_value
+    NvU32 old_sem_value = (NvU32)old_value;
+    NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
+    NvU64 new_value;
+
+    uvm_assert_spinlock_locked(&tracking_semaphore->lock);
+
+    // The following logic to update the completed value is very subtle, it
+    // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
+    // before going through this code.
+
+    if (old_sem_value == new_sem_value) {
+        // No progress since the last update.
+        // No additional memory barrier required in this case as completed_value
+        // is always updated under the spinlock that this thread just acquired.
+        // That guarantees full ordering with all the accesses the thread that
+        // updated completed_value did under the lock including the GPU
+        // semaphore read.
+        return old_value;
+    }
+
+    // Replace the bottom 32-bits with the new semaphore value
+    new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value;
+
+    // If we've wrapped around, add 2^32 to the value
+    // Notably the user of the GPU tracking semaphore needs to guarantee that
+    // the value is updated often enough to notice the wrap around each time it
+    // happens. In case of a channel tracking semaphore that's released for each
+    // push, it's easily guaranteed because of the small number of GPFIFO
+    // entries available per channel (there could be at most as many pending
+    // pushes as GPFIFO entries).
+    if (new_sem_value < old_sem_value)
+        new_value += 1ULL << 32;
+
+    // Use an atomic write even though the spinlock is held so that the value can
+    // be (carefully) read atomically outside of the lock.
+    //
+    // atomic64_set() on its own doesn't imply any memory barriers and we need
+    // prior memory accesses (in particular the read of the GPU semaphore
+    // payload) by this thread to be visible to other threads that see the newly
+    // set completed_value. smp_mb__before_atomic() provides that ordering.
+    //
+    // Also see the comment and matching smp_mb__after_atomic() barrier in
+    // uvm_gpu_tracking_semaphore_is_value_completed().
+    //
+    // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
+    // have been added that are exactly what we need and could be slightly
+    // faster on arm and powerpc than the implementation below. But at least in
+    // 4.3 the implementation looks broken for arm32 (it maps directly to
+    // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
+    // architectures) so instead of dealing with that just use a slightly bigger
+    // hammer.
+    smp_mb__before_atomic();
+    atomic64_set(&tracking_semaphore->completed_value, new_value);
+
+    // For this thread, we don't want any later accesses to be ordered above the
+    // GPU semaphore read. This could be accomplished by using a
+    // smp_load_acquire() for reading it, but given that it's also a pretty
+    // recent addition to the kernel, just leverage smp_mb__after_atomic() that
+    // guarantees that no accesses will be ordered above the atomic (and hence
+    // the GPU semaphore read).
+    //
+    // Notably the soon following uvm_spin_unlock() is a release barrier that
+    // allows later memory accesses to be reordered above it and hence doesn't
+    // provide the necessary ordering with the GPU semaphore read.
+    //
+    // Also notably this would still need to be handled if we ever switch to
+    // atomic64_set_release() and atomic64_read_acquire() for accessing
+    // completed_value.
+    smp_mb__after_atomic();
+
+    return new_value;
+}
+
+NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
+{
+    NvU64 completed;
+
+    // Check that the GPU which owns the semaphore is still present
+    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
+
+    uvm_spin_lock(&tracking_semaphore->lock);
+
+    completed = update_completed_value_locked(tracking_semaphore);
+
+    uvm_spin_unlock(&tracking_semaphore->lock);
+
+    return completed;
+}
+
+bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
+{
+    NvU64 completed = atomic64_read(&tracking_sem->completed_value);
+
+    // Check that the GPU which owns the semaphore is still present
+    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem));
+
+    if (completed >= value) {
+        // atomic64_read() doesn't imply any memory barriers and we need all
+        // subsequent memory accesses in this thread to be ordered after the
+        // atomic read of the completed value above as that will also order them
+        // with any accesses (in particular the GPU semaphore read) performed by
+        // the other thread prior to it setting the completed_value we read.
+        // smp_mb__after_atomic() provides that ordering.
+        //
+        // Also see the comment in update_completed_value_locked().
+        smp_mb__after_atomic();
+
+        return true;
+    }
+
+    return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value;
+}
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
@@ -0,0 +1,181 @@
+/*******************************************************************************
+    Copyright (c) 2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_GPU_SEMAPHORE_H__
+#define __UVM_GPU_SEMAPHORE_H__
+
+#include "uvm_forward_decl.h"
+#include "uvm_lock.h"
+#include "uvm_rm_mem.h"
+#include "uvm_linux.h"
+
+// A GPU semaphore is a memory location accessible by the GPUs and the CPU
+// that's used for synchronization among them.
+// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
+// locations. The same memory can be accessed by multiple GPUs and the CPU
+// allowing for different synchronization schemes.
+//
+// The UVM driver maintains a per-GPU semaphore pool that grows on demand as
+// semaphores are allocated out of it.
+//
+// TODO: Bug 200194638: Add support for timestamps (the GPU also supports
+//       releasing 16-byte semaphores that include an 8-byte timestamp).
+struct uvm_gpu_semaphore_struct
+{
+    // The semaphore pool page the semaphore came from
+    uvm_gpu_semaphore_pool_page_t *page;
+
+    // Pointer to the memory location
+    NvU32 *payload;
+};
+
+// A primitive used for tracking progress of the GPU
+// Whenever a stream of GPU operations needs to be synchronized it increments
+// the semaphore's payload as the last step so that other processors
+// can acquire (wait for) it.
+// The primitive maintains a 64-bit counter on top of the 32-bit GPU semaphore
+// to support 2^64 synchronization points instead of just 2^32. The logic relies
+// on being able to notice every time the 32-bit counter wraps around (see
+// update_completed_value()).
+struct uvm_gpu_tracking_semaphore_struct
+{
+    uvm_gpu_semaphore_t semaphore;
+
+    // Last completed value
+    // The bottom 32-bits will always match the latest semaphore payload seen in
+    // update_completed_value_locked().
+    atomic64_t completed_value;
+
+    // Lock protecting updates to the completed_value
+    uvm_spinlock_t lock;
+
+    // Last queued value
+    // All accesses to the queued value should be handled by the user of the GPU
+    // tracking semaphore.
+    NvU64 queued_value;
+};
+
+// Create a semaphore pool for a GPU.
+NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
+
+// Destroy a semaphore pool
+// Locking:
+//  - Global lock needs to be held in read mode (for unmapping from all GPUs)
+//  - Internally acquires:
+//    - GPU semaphore pool lock
+//    - RM API lock
+//    - RM GPUs lock
+void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
+
+// Allocate a semaphore from the pool.
+// The semaphore will be mapped on all GPUs currently registered with the UVM
+// driver, and on all new GPUs which will be registered in the future.
+// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
+// to the proxy address space.
+//
+// The semaphore's payload will be initially set to 0.
+//
+// Locking:
+//  - Global lock needs to be held in read mode (for mapping on all GPUs)
+//  - Internally synchronized and hence safe to be called from multiple threads
+//  - Internally acquires:
+//    - GPU semaphore pool lock
+//    - RM API lock
+//    - RM GPUs lock
+NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore);
+
+// Free a semaphore
+// Locking:
+//  - Internally synchronized and hence safe to be called from multiple threads
+void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore);
+
+// Map all the semaphores from the pool on a GPU
+//
+// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
+// to the proxy address space.
+NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
+
+// Unmap all the semaphores from the pool from a GPU
+//
+// The unmapping affects all the VA spaces where the semaphores are currently
+// mapped.
+void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
+
+// Get the GPU VA of a semaphore in UVM's internal address space.
+NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
+
+// Get the GPU VA of a semaphore in the proxy address space.
+NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
+
+NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
+
+// Read the 32-bit payload of the semaphore
+// Notably doesn't provide any memory ordering guarantees and needs to be used with
+// care. For an example of what needs to be considered see
+// uvm_gpu_tracking_semaphore_update_completed_value().
+NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore);
+
+// Set the 32-bit payload of the semaphore
+// Guarantees that all memory accesses preceding setting the payload won't be
+// moved past it.
+void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload);
+
+// Allocate a GPU tracking semaphore from the pool
+// Locking same as uvm_gpu_semaphore_alloc()
+NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem);
+
+// Free a GPU tracking semaphore
+// Locking same as uvm_gpu_semaphore_free()
+void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem);
+
+// Check whether a specific value has been completed
+//
+// If true is returned, guarantees that all operations ordered prior to a
+// processor (commonly a GPU) completing the specific value will be visible to
+// the caller.
+//
+// In case a GPU is supposed to complete a value, care needs to be taken for all GPU
+// operations to be ordered correctly with the semaphore release that sets the value.
+// In case it's the CPU completing the value, uvm_gpu_semaphore_set_payload()
+// should be used that provides the necessary ordering guarantees.
+//
+// Locking: this operation is internally synchronized and hence safe to be
+// called from multiple threads.
+bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value);
+
+// Update and return the completed value
+//
+// Provides the same guarantees as if uvm_gpu_tracking_semaphore_is_value_completed()
+// returned true for the returned completed value.
+//
+// Locking: this operation is internally synchronized and hence safe to be
+// called from multiple threads.
+NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_sem);
+
+// See the comments for uvm_gpu_tracking_semaphore_is_value_completed
+static bool uvm_gpu_tracking_semaphore_is_completed(uvm_gpu_tracking_semaphore_t *tracking_sem)
+{
+    return uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, tracking_sem->queued_value);
+}
+
+#endif // __UVM_GPU_SEMAPHORE_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
@@ -0,0 +1,165 @@
+/*******************************************************************************
+    Copyright (c) 2015-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_global.h"
+#include "uvm_gpu_semaphore.h"
+#include "uvm_test.h"
+#include "uvm_va_space.h"
+#include "uvm_kvmalloc.h"
+
+static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 increment_by)
+{
+    NvU64 new_value;
+    NvU64 completed = uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem);
+    new_value = completed + increment_by;
+    tracking_sem->queued_value = new_value;
+
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == completed);
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, 0));
+    if (completed > 0)
+        TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed - 1));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
+    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed + 1));
+    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
+    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
+
+    uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
+    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
+
+    return NV_OK;
+}
+
+static NV_STATUS test_tracking(uvm_va_space_t *va_space)
+{
+    NV_STATUS status;
+    uvm_gpu_tracking_semaphore_t tracking_sem;
+    int i;
+    uvm_gpu_t *gpu = uvm_va_space_find_first_gpu(va_space);
+
+    if (gpu == NULL)
+        return NV_ERR_INVALID_STATE;
+
+    status = uvm_gpu_tracking_semaphore_alloc(gpu->semaphore_pool, &tracking_sem);
+    if (status != NV_OK)
+        return status;
+
+    status = add_and_test(&tracking_sem, 1);
+    if (status != NV_OK)
+        goto done;
+
+    for (i = 0; i < 100; ++i) {
+        status = add_and_test(&tracking_sem, UINT_MAX - 1);
+        if (status != NV_OK)
+            goto done;
+    }
+
+done:
+    uvm_gpu_tracking_semaphore_free(&tracking_sem);
+    return status;
+}
+
+#define NUM_SEMAPHORES_PER_GPU 4096
+
+static NV_STATUS test_alloc(uvm_va_space_t *va_space)
+{
+    NV_STATUS status = NV_OK;
+    uvm_gpu_t *gpu;
+    uvm_gpu_semaphore_t *semaphores;
+    int i;
+    NvU32 semaphore_count;
+    NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
+    NvU32 current_semaphore = 0;
+
+    if (gpu_count == 0)
+        return NV_ERR_INVALID_STATE;
+
+    semaphore_count = gpu_count * NUM_SEMAPHORES_PER_GPU;
+
+    semaphores = uvm_kvmalloc_zero(semaphore_count * sizeof(*semaphores));
+    if (semaphores == NULL)
+        return NV_ERR_NO_MEMORY;
+
+    for (i = 0; i < NUM_SEMAPHORES_PER_GPU; ++i) {
+        for_each_va_space_gpu(gpu, va_space) {
+            status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &semaphores[current_semaphore++]);
+            if (status != NV_OK)
+                goto done;
+        }
+    }
+
+    for (i = 0; i < current_semaphore; ++i) {
+        for_each_va_space_gpu(gpu, va_space) {
+            NvU64 gpu_va;
+
+            gpu_va = uvm_gpu_semaphore_get_gpu_uvm_va(&semaphores[i], gpu);
+            TEST_CHECK_GOTO(gpu_va != 0, done);
+
+            // In SR-IOV heavy, there should be a mapping in the proxy VA space
+            // too.
+            if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
+                gpu_va = uvm_gpu_semaphore_get_gpu_proxy_va(&semaphores[i], gpu);
+                TEST_CHECK_GOTO(gpu_va != 0, done);
+            }
+
+            uvm_gpu_semaphore_set_payload(&semaphores[i], 1);
+            TEST_CHECK_GOTO(uvm_gpu_semaphore_get_payload(&semaphores[i]) == 1, done);
+        }
+    }
+
+done:
+    for (i = 0; i < current_semaphore; ++i)
+        uvm_gpu_semaphore_free(&semaphores[i]);
+
+    uvm_kvfree(semaphores);
+
+    return status;
+}
+
+
+NV_STATUS uvm_test_gpu_semaphore_sanity(UVM_TEST_GPU_SEMAPHORE_SANITY_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_mutex_lock(&g_uvm_global.global_lock);
+    uvm_va_space_down_read_rm(va_space);
+
+    status = test_alloc(va_space);
+    if (status != NV_OK)
+        goto done;
+
+    status = test_tracking(va_space);
+    if (status != NV_OK)
+        goto done;
+
+done:
+    uvm_va_space_up_read_rm(va_space);
+    uvm_mutex_unlock(&g_uvm_global.global_lock);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -0,0 +1,991 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_kvmalloc.h"
+
+#include "cla16f.h"
+#include "clb069.h"
+#include "clb06f.h"
+#include "clb0b5.h"
+#include "clc06f.h"
+#include "clc0b5.h"
+#include "clc1b5.h"
+#include "ctrl2080mc.h"
+#include "clc3b5.h"
+#include "clc36f.h"
+#include "clc369.h"
+#include "clc365.h"
+#include "clc46f.h"
+#include "clc5b5.h"
+#include "clc6b5.h"
+#include "clc56f.h"
+#include "clc7b5.h"
+
+
+
+
+
+#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
+#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
+#define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
+#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
+#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
+
+
+
+
+// Table for copy engine functions.
+// Each entry is associated with a copy engine class through the 'class' field.
+// By setting the 'parent_class' field, a class will inherit the parent class's
+// functions for any fields left NULL when uvm_hal_init_table() runs upon module
+// load. The parent class must appear earlier in the array than the child.
+static uvm_hal_class_ops_t ce_table[] =
+{
+    {
+        .id = MAXWELL_DMA_COPY_A,
+        .u.ce_ops = {
+            .init = uvm_hal_maxwell_ce_init,
+            .method_validate = uvm_hal_method_validate_stub,
+            .semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
+            .semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
+            .semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
+            .offset_out = uvm_hal_maxwell_ce_offset_out,
+            .offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
+            .phys_mode = uvm_hal_maxwell_ce_phys_mode,
+            .plc_mode = uvm_hal_maxwell_ce_plc_mode,
+            .memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
+            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
+            .memcopy = uvm_hal_maxwell_ce_memcopy,
+            .memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
+            .memset_validate = uvm_hal_ce_memset_validate_stub,
+            .memset_1 = uvm_hal_maxwell_ce_memset_1,
+            .memset_4 = uvm_hal_maxwell_ce_memset_4,
+            .memset_8 = uvm_hal_maxwell_ce_memset_8,
+            .memset_v_4 = uvm_hal_maxwell_ce_memset_v_4,
+        }
+    },
+    {
+        .id = PASCAL_DMA_COPY_A,
+        .parent_id = MAXWELL_DMA_COPY_A,
+        .u.ce_ops = {
+            .semaphore_release = uvm_hal_pascal_ce_semaphore_release,
+            .semaphore_timestamp = uvm_hal_pascal_ce_semaphore_timestamp,
+            .semaphore_reduction_inc = uvm_hal_pascal_ce_semaphore_reduction_inc,
+            .offset_out = uvm_hal_pascal_ce_offset_out,
+            .offset_in_out = uvm_hal_pascal_ce_offset_in_out,
+        }
+    },
+    {
+        .id = PASCAL_DMA_COPY_B,
+        .parent_id = PASCAL_DMA_COPY_A,
+        .u.ce_ops = {}
+    },
+    {
+        .id = VOLTA_DMA_COPY_A,
+        .parent_id = PASCAL_DMA_COPY_B,
+        .u.ce_ops = {},
+    },
+    {
+        .id = TURING_DMA_COPY_A,
+        .parent_id = VOLTA_DMA_COPY_A,
+        .u.ce_ops = {},
+    },
+    {
+        .id = AMPERE_DMA_COPY_A,
+        .parent_id = TURING_DMA_COPY_A,
+        .u.ce_ops = {
+            .method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
+            .phys_mode = uvm_hal_ampere_ce_phys_mode,
+            .memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
+            .memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
+            .memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
+        },
+    },
+    {
+        .id = AMPERE_DMA_COPY_B,
+        .parent_id = AMPERE_DMA_COPY_A,
+        .u.ce_ops = {
+            .method_validate = uvm_hal_method_validate_stub,
+            .plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
+            .memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
+            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
+            .memset_validate = uvm_hal_ce_memset_validate_stub,
+        },
+    },
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+};
+
+// Table for GPFIFO functions.  Same idea as the copy engine table.
+static uvm_hal_class_ops_t host_table[] =
+{
+    {
+        // This host class is reported for GM10x
+        .id = KEPLER_CHANNEL_GPFIFO_B,
+        .u.host_ops = {
+            .init = uvm_hal_maxwell_host_init_noop,
+            .method_validate = uvm_hal_method_validate_stub,
+            .sw_method_validate = uvm_hal_method_validate_stub,
+            .wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
+            .membar_sys = uvm_hal_maxwell_host_membar_sys,
+            // No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
+            .membar_gpu = uvm_hal_maxwell_host_membar_sys,
+            .noop = uvm_hal_maxwell_host_noop,
+            .interrupt = uvm_hal_maxwell_host_interrupt,
+            .semaphore_acquire = uvm_hal_maxwell_host_semaphore_acquire,
+            .semaphore_release = uvm_hal_maxwell_host_semaphore_release,
+            .semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
+            .set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
+            .write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
+            .tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
+            .tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
+            .tlb_invalidate_test = uvm_hal_maxwell_host_tlb_invalidate_test,
+            .replay_faults = uvm_hal_maxwell_replay_faults_unsupported,
+            .cancel_faults_global = uvm_hal_maxwell_cancel_faults_global_unsupported,
+            .cancel_faults_targeted = uvm_hal_maxwell_cancel_faults_targeted_unsupported,
+            .cancel_faults_va = uvm_hal_maxwell_cancel_faults_va_unsupported,
+            .clear_faulted_channel_sw_method = uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported,
+            .clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
+            .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
+            .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
+            .access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
+            .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
+            .get_time = uvm_hal_maxwell_get_time,
+        }
+    },
+    {
+        // This host class is reported for GM20x
+        .id = MAXWELL_CHANNEL_GPFIFO_A,
+        .parent_id = KEPLER_CHANNEL_GPFIFO_B,
+        .u.host_ops = {
+            .tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_b06f,
+        }
+    },
+    {
+        .id = PASCAL_CHANNEL_GPFIFO_A,
+        .parent_id = MAXWELL_CHANNEL_GPFIFO_A,
+        .u.host_ops = {
+            .init = uvm_hal_pascal_host_init,
+            .membar_sys = uvm_hal_pascal_host_membar_sys,
+            .membar_gpu = uvm_hal_pascal_host_membar_gpu,
+            .tlb_invalidate_all = uvm_hal_pascal_host_tlb_invalidate_all,
+            .tlb_invalidate_va = uvm_hal_pascal_host_tlb_invalidate_va,
+            .tlb_invalidate_test = uvm_hal_pascal_host_tlb_invalidate_test,
+            .replay_faults = uvm_hal_pascal_replay_faults,
+            .cancel_faults_global = uvm_hal_pascal_cancel_faults_global,
+            .cancel_faults_targeted = uvm_hal_pascal_cancel_faults_targeted,
+        }
+    },
+    {
+        .id = VOLTA_CHANNEL_GPFIFO_A,
+        .parent_id = PASCAL_CHANNEL_GPFIFO_A,
+        .u.host_ops = {
+            .write_gpu_put = uvm_hal_volta_host_write_gpu_put,
+            .tlb_invalidate_va = uvm_hal_volta_host_tlb_invalidate_va,
+            .replay_faults = uvm_hal_volta_replay_faults,
+            .cancel_faults_va = uvm_hal_volta_cancel_faults_va,
+            .clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
+            .access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
+            .access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
+            .access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
+            .semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
+        }
+    },
+    {
+        .id = TURING_CHANNEL_GPFIFO_A,
+        .parent_id = VOLTA_CHANNEL_GPFIFO_A,
+        .u.host_ops = {
+            .semaphore_acquire = uvm_hal_turing_host_semaphore_acquire,
+            .semaphore_release = uvm_hal_turing_host_semaphore_release,
+            .clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
+            .set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
+        }
+    },
+    {
+        .id = AMPERE_CHANNEL_GPFIFO_A,
+        .parent_id = TURING_CHANNEL_GPFIFO_A,
+        .u.host_ops = {
+            .method_validate = uvm_hal_ampere_host_method_validate,
+            .sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
+            .clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
+            .clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
+            .tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
+            .tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va,
+            .tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
+        }
+    },
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+};
+
+static uvm_hal_class_ops_t arch_table[] =
+{
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.arch_ops = {
+            .init_properties = uvm_hal_maxwell_arch_init_properties,
+            .mmu_mode_hal = uvm_hal_mmu_mode_maxwell,
+            .enable_prefetch_faults = uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported,
+            .disable_prefetch_faults = uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported,
+            .mmu_engine_id_to_type = uvm_hal_maxwell_mmu_engine_id_to_type_unsupported,
+            .mmu_client_id_to_utlb_id = uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.arch_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .u.arch_ops = {
+            .init_properties = uvm_hal_pascal_arch_init_properties,
+            .mmu_mode_hal = uvm_hal_mmu_mode_pascal,
+            .enable_prefetch_faults = uvm_hal_pascal_mmu_enable_prefetch_faults,
+            .disable_prefetch_faults = uvm_hal_pascal_mmu_disable_prefetch_faults,
+            .mmu_client_id_to_utlb_id = uvm_hal_pascal_mmu_client_id_to_utlb_id,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .u.arch_ops = {
+            .init_properties = uvm_hal_volta_arch_init_properties,
+            .mmu_mode_hal = uvm_hal_mmu_mode_volta,
+            .mmu_engine_id_to_type = uvm_hal_volta_mmu_engine_id_to_type,
+            .mmu_client_id_to_utlb_id = uvm_hal_volta_mmu_client_id_to_utlb_id,
+        },
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .u.arch_ops = {
+            .init_properties = uvm_hal_turing_arch_init_properties,
+            .mmu_mode_hal = uvm_hal_mmu_mode_turing,
+            .mmu_engine_id_to_type = uvm_hal_turing_mmu_engine_id_to_type,
+        },
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .u.arch_ops = {
+            .init_properties = uvm_hal_ampere_arch_init_properties,
+            .mmu_mode_hal = uvm_hal_mmu_mode_ampere,
+            .mmu_engine_id_to_type = uvm_hal_ampere_mmu_engine_id_to_type,
+            .mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
+        },
+    },
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+};
+
+static uvm_hal_class_ops_t fault_buffer_table[] =
+{
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.fault_buffer_ops = {
+            .enable_replayable_faults  = uvm_hal_maxwell_enable_replayable_faults_unsupported,
+            .disable_replayable_faults = uvm_hal_maxwell_disable_replayable_faults_unsupported,
+            .clear_replayable_faults = uvm_hal_maxwell_clear_replayable_faults_unsupported,
+            .read_put = uvm_hal_maxwell_fault_buffer_read_put_unsupported,
+            .read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
+            .write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
+            .get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
+            .parse_entry = uvm_hal_maxwell_fault_buffer_parse_entry_unsupported,
+            .entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
+            .entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
+            .entry_size = uvm_hal_maxwell_fault_buffer_entry_size_unsupported,
+            .parse_non_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.fault_buffer_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .u.fault_buffer_ops = {
+            .enable_replayable_faults  = uvm_hal_pascal_enable_replayable_faults,
+            .disable_replayable_faults = uvm_hal_pascal_disable_replayable_faults,
+            .clear_replayable_faults = uvm_hal_pascal_clear_replayable_faults,
+            .read_put = uvm_hal_pascal_fault_buffer_read_put,
+            .read_get = uvm_hal_pascal_fault_buffer_read_get,
+            .write_get = uvm_hal_pascal_fault_buffer_write_get,
+            .parse_entry = uvm_hal_pascal_fault_buffer_parse_entry,
+            .entry_is_valid = uvm_hal_pascal_fault_buffer_entry_is_valid,
+            .entry_clear_valid = uvm_hal_pascal_fault_buffer_entry_clear_valid,
+            .entry_size = uvm_hal_pascal_fault_buffer_entry_size,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .u.fault_buffer_ops = {
+            .read_put = uvm_hal_volta_fault_buffer_read_put,
+            .read_get = uvm_hal_volta_fault_buffer_read_get,
+            .write_get = uvm_hal_volta_fault_buffer_write_get,
+            .get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
+            .parse_entry = uvm_hal_volta_fault_buffer_parse_entry,
+            .parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .u.fault_buffer_ops = {
+            .disable_replayable_faults = uvm_hal_turing_disable_replayable_faults,
+            .clear_replayable_faults = uvm_hal_turing_clear_replayable_faults,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .u.fault_buffer_ops = {}
+    },
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+};
+
+static uvm_hal_class_ops_t access_counter_buffer_table[] =
+{
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.access_counter_buffer_ops = {
+            .enable_access_counter_notifications  = uvm_hal_maxwell_enable_access_counter_notifications_unsupported,
+            .disable_access_counter_notifications = uvm_hal_maxwell_disable_access_counter_notifications_unsupported,
+            .clear_access_counter_notifications = uvm_hal_maxwell_clear_access_counter_notifications_unsupported,
+            .parse_entry = uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported,
+            .entry_is_valid = uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported,
+            .entry_clear_valid = uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported,
+            .entry_size = uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.access_counter_buffer_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .u.access_counter_buffer_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .u.access_counter_buffer_ops = {
+            .enable_access_counter_notifications  = uvm_hal_volta_enable_access_counter_notifications,
+            .disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
+            .clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
+            .parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
+            .entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
+            .entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
+            .entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .u.access_counter_buffer_ops = {
+            .disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
+            .clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .u.access_counter_buffer_ops = {}
+    },
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
+{
+    NvLength i;
+
+    // go through array and match on class.
+    for (i = 0; i < row_count; i++) {
+        if (table[i].id == id)
+            return table + i;
+    }
+
+    return NULL;
+}
+
+// use memcmp to check for function pointer assignment in a well defined,
+// general way.
+static inline bool op_is_null(uvm_hal_class_ops_t *row, NvLength op_idx, NvLength op_offset)
+{
+    void *temp = NULL;
+    return memcmp(&temp, (char *)row + op_offset + sizeof(void *) * op_idx, sizeof(void *)) == 0;
+}
+
+// use memcpy to copy function pointers in a well defined, general way.
+static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, NvLength op_idx, NvLength op_offset)
+{
+    void *m_dst = (char *)dst + op_offset + sizeof(void *) * op_idx;
+    void *m_src = (char *)src + op_offset + sizeof(void *) * op_idx;
+    memcpy(m_dst, m_src, sizeof(void *));
+}
+
+static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
+                                             NvU32 row_count,
+                                             NvLength op_count,
+                                             NvLength op_offset)
+{
+    NvLength i;
+
+    for (i = 0; i < row_count; i++) {
+        NvLength j;
+        uvm_hal_class_ops_t *parent = NULL;
+
+        if (table[i].parent_id != 0) {
+            parent = ops_find_by_id(table, i, table[i].parent_id);
+            if (parent == NULL)
+                return NV_ERR_INVALID_CLASS;
+
+            // Go through all the ops and assign from parent's corresponding op
+            // if NULL
+            for (j = 0; j < op_count; j++) {
+                if (op_is_null(table + i, j, op_offset))
+                    op_copy(table + i, parent, j, op_offset);
+            }
+        }
+
+        // At this point, it is an error to have missing HAL operations
+        for (j = 0; j < op_count; j++) {
+            if (op_is_null(table + i, j, op_offset))
+                return NV_ERR_INVALID_STATE;
+        }
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_hal_init_table(void)
+{
+    NV_STATUS status;
+
+    status = ops_init_from_parent(ce_table, ARRAY_SIZE(ce_table), CE_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.ce_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(ce_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+    status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+    status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+    status = ops_init_from_parent(fault_buffer_table,
+                                  ARRAY_SIZE(fault_buffer_table),
+                                  FAULT_BUFFER_OP_COUNT,
+                                  offsetof(uvm_hal_class_ops_t, u.fault_buffer_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(fault_buffer_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+    status = ops_init_from_parent(access_counter_buffer_table,
+                                  ARRAY_SIZE(access_counter_buffer_table),
+                                  ACCESS_COUNTER_BUFFER_OP_COUNT,
+                                  offsetof(uvm_hal_class_ops_t, u.access_counter_buffer_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(access_counter_buffer_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
+
+
+
+
+
+
+
+
+
+
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    const UvmGpuInfo *gpu_info = &parent_gpu->rm_info;
+    uvm_hal_class_ops_t *class_ops;
+
+    class_ops = ops_find_by_id(ce_table, ARRAY_SIZE(ce_table), gpu_info->ceClass);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n", gpu_info->ceClass, parent_gpu->name);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->ce_hal = &class_ops->u.ce_ops;
+
+    class_ops = ops_find_by_id(host_table, ARRAY_SIZE(host_table), gpu_info->hostClass);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n", gpu_info->hostClass, parent_gpu->name);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->host_hal = &class_ops->u.host_ops;
+
+    class_ops = ops_find_by_id(arch_table, ARRAY_SIZE(arch_table), gpu_info->gpuArch);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n", gpu_info->gpuArch, parent_gpu->name);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->arch_hal = &class_ops->u.arch_ops;
+
+    class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->fault_buffer_hal = &class_ops->u.fault_buffer_ops;
+
+    class_ops = ops_find_by_id(access_counter_buffer_table,
+                               ARRAY_SIZE(access_counter_buffer_table),
+                               gpu_info->gpuArch);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
+
+
+
+
+
+
+
+
+
+
+
+    return NV_OK;
+}
+
+void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
+{
+    parent_gpu->arch_hal->init_properties(parent_gpu);
+
+    // Override the HAL when in non-passthrough virtualization
+    // TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
+    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
+        parent_gpu->access_counters_supported = false;
+}
+
+void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
+{
+    uvm_gpu_t *gpu;
+    NvU32 i;
+
+    if (membar == UVM_MEMBAR_NONE)
+        return;
+
+    gpu = uvm_push_get_gpu(push);
+
+    for (i = 0; i < gpu->parent->num_hshub_tlb_invalidate_membars; i++)
+        gpu->parent->host_hal->membar_gpu(push);
+
+    uvm_hal_membar(gpu, push, membar);
+}
+
+const char *uvm_aperture_string(uvm_aperture_t aperture)
+{
+    BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
+
+    switch (aperture) {
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_0);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_1);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_2);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_3);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_4);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_5);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_6);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_7);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_MAX);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_SYS);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_VID);
+        UVM_ENUM_STRING_CASE(UVM_APERTURE_DEFAULT);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+const char *uvm_prot_string(uvm_prot_t prot)
+{
+    BUILD_BUG_ON(UVM_PROT_MAX != 4);
+
+    switch (prot) {
+        UVM_ENUM_STRING_CASE(UVM_PROT_NONE);
+        UVM_ENUM_STRING_CASE(UVM_PROT_READ_ONLY);
+        UVM_ENUM_STRING_CASE(UVM_PROT_READ_WRITE);
+        UVM_ENUM_STRING_CASE(UVM_PROT_READ_WRITE_ATOMIC);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+const char *uvm_membar_string(uvm_membar_t membar)
+{
+    switch (membar) {
+        UVM_ENUM_STRING_CASE(UVM_MEMBAR_SYS);
+        UVM_ENUM_STRING_CASE(UVM_MEMBAR_GPU);
+        UVM_ENUM_STRING_CASE(UVM_MEMBAR_NONE);
+    }
+
+    return "UNKNOWN";
+}
+
+const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_type)
+{
+    BUILD_BUG_ON(UVM_FAULT_ACCESS_TYPE_COUNT != 5);
+
+    switch (fault_access_type) {
+        UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_WRITE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_READ);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_PREFETCH);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
+{
+    BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 16);
+
+    switch (fault_type) {
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PDE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PTE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_ATOMIC);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_WRITE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_READ);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PDE_SIZE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_VA_LIMIT_VIOLATION);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNBOUND_INST_BLOCK);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PRIV_VIOLATION);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PITCH_MASK_VIOLATION);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_WORK_CREATION);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_APERTURE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_COMPRESSION_FAILURE);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_KIND);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_REGION_VIOLATION);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_POISONED);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+const char *uvm_fault_client_type_string(uvm_fault_client_type_t fault_client_type)
+{
+    BUILD_BUG_ON(UVM_FAULT_CLIENT_TYPE_COUNT != 2);
+
+    switch (fault_client_type) {
+        UVM_ENUM_STRING_CASE(UVM_FAULT_CLIENT_TYPE_GPC);
+        UVM_ENUM_STRING_CASE(UVM_FAULT_CLIENT_TYPE_HUB);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
+{
+    BUILD_BUG_ON(UVM_MMU_ENGINE_TYPE_COUNT != 3);
+
+    switch (mmu_engine_type) {
+        UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_GRAPHICS);
+        UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_HOST);
+        UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_CE);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
+{
+    UVM_DBG_PRINT("fault_address:                    0x%llx\n", entry->fault_address);
+    UVM_DBG_PRINT("    fault_instance_ptr:           {0x%llx:%s}\n", entry->instance_ptr.address,
+                                                                     uvm_aperture_string(entry->instance_ptr.aperture));
+    UVM_DBG_PRINT("    fault_type:                   %s\n", uvm_fault_type_string(entry->fault_type));
+    UVM_DBG_PRINT("    fault_access_type:            %s\n", uvm_fault_access_type_string(entry->fault_access_type));
+    UVM_DBG_PRINT("    is_replayable:                %s\n", entry->is_replayable? "true": "false");
+    UVM_DBG_PRINT("    is_virtual:                   %s\n", entry->is_virtual? "true": "false");
+    UVM_DBG_PRINT("    in_protected_mode:            %s\n", entry->in_protected_mode? "true": "false");
+    UVM_DBG_PRINT("    fault_source.client_type:     %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
+    UVM_DBG_PRINT("    fault_source.client_id:       %d\n", entry->fault_source.client_id);
+    UVM_DBG_PRINT("    fault_source.gpc_id:          %d\n", entry->fault_source.gpc_id);
+    UVM_DBG_PRINT("    fault_source.mmu_engine_id:   %d\n", entry->fault_source.mmu_engine_id);
+    UVM_DBG_PRINT("    fault_source.mmu_engine_type: %s\n",
+                  uvm_mmu_engine_type_string(entry->fault_source.mmu_engine_type));
+    UVM_DBG_PRINT("    timestamp:                    %llu\n", entry->timestamp);
+}
+
+const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
+{
+    BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
+
+    switch (access_counter_type) {
+        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
+        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
+{
+    if (!entry->address.is_virtual) {
+        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
+                                                         uvm_aperture_string(entry->address.aperture));
+    }
+    else {
+        UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
+        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
+                                                    uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
+        UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
+        UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->virtual_info.mmu_engine_id);
+        UVM_DBG_PRINT("    ve_id           %u\n", entry->virtual_info.ve_id);
+    }
+
+    UVM_DBG_PRINT("    is_virtual      %u\n", entry->address.is_virtual);
+    UVM_DBG_PRINT("    counter_type    %s\n", uvm_access_counter_type_string(entry->counter_type));
+    UVM_DBG_PRINT("    counter_value   %u\n", entry->counter_value);
+    UVM_DBG_PRINT("    subgranularity  0x%08x\n", entry->sub_granularity);
+    UVM_DBG_PRINT("    bank            %u\n", entry->bank);
+    UVM_DBG_PRINT("    tag             %x\n", entry->tag);
+}
+
+bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+{
+    return true;
+}
+
+bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    return true;
+}
+
+void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
+{
+}
+
+bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
+{
+    return true;
+}
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -0,0 +1,818 @@
+/*******************************************************************************
+    Copyright (c) 2015-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_HAL_H__
+#define __UVM_HAL_H__
+
+#include "uvm_types.h"
+#include "uvm_common.h"
+#include "uvm_forward_decl.h"
+#include "uvm_hal_types.h"
+#include "uvm_push.h"
+#include "uvm_gpu.h"
+#include "uvm_test_ioctl.h"
+
+// A dummy method validation that always returns true; it can be used to skip
+// CE/Host/SW method validations for a given architecture
+bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+
+typedef void (*uvm_hal_init_t)(uvm_push_t *push);
+void uvm_hal_maxwell_ce_init(uvm_push_t *push);
+void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
+void uvm_hal_pascal_host_init(uvm_push_t *push);
+
+
+
+
+
+// Host method validation
+typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+
+// SW method validation
+typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+
+// Wait for idle
+typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
+void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push);
+
+// Membar SYS
+typedef void (*uvm_hal_membar_sys_t)(uvm_push_t *push);
+void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push);
+void uvm_hal_pascal_host_membar_sys(uvm_push_t *push);
+
+// Membar GPU
+typedef void (*uvm_hal_membar_gpu_t)(uvm_push_t *push);
+void uvm_hal_pascal_host_membar_gpu(uvm_push_t *push);
+
+// Put a noop in the pushbuffer of the given size in bytes.
+// The size needs to be a multiple of 4.
+typedef void (*uvm_hal_noop_t)(uvm_push_t *push, NvU32 size);
+void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size);
+
+// Host-generated interrupt method. This will generate a call to
+// uvm_isr_top_half_entry.
+//
+// This is a non-stalling interrupt, which means that it's fire-and-forget. Host
+// will not stall method processing nor stop channel switching, which means that
+// we cannot directly identify in software which channel generated the
+// interrupt.
+//
+// We must set up software state before pushing the interrupt, and check any
+// possible interrupt condition on receiving an interrupt callback.
+typedef void (*uvm_hal_interrupt_t)(uvm_push_t *push);
+void uvm_hal_maxwell_host_interrupt(uvm_push_t *push);
+
+// Issue a TLB invalidate applying to all VAs in a PDB.
+//
+// The PTE caches (TLBs) are always invalidated. The PDE caches for all VAs in
+// the PDB are invalidated from the specified depth down to the PTEs. This
+// allows for optimizations if the caller isn't writing all levels of the PDEs.
+// Depth follows the MMU code convention where depth 0 is the top level and here
+// means to invalidate everything. See uvm_pascal_mmu.c for an example of depth
+// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
+// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
+// them to be included in the invalidation.
+//
+// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
+// performed logically after the TLB invalidate such that all physical memory
+// accesses using the old translations are ordered to the scope of the membar.
+typedef void (*uvm_hal_host_tlb_invalidate_all_t)(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  NvU32 depth,
+                                                  uvm_membar_t membar);
+void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  NvU32 depth,
+                                                  uvm_membar_t membar);
+void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  NvU32 depth,
+                                                  uvm_membar_t membar);
+void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            uvm_membar_t membar);
+void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            uvm_membar_t membar);
+
+
+
+
+
+
+
+// Issue a TLB invalidate applying to the specified VA range in a PDB.
+//
+// The PTE caches (TLBs) for each page size aligned VA within the VA range
+// are always invalidated. The PDE caches covering the specified VA
+// range in the PDB are invalidated from the specified depth down to the PTEs.
+// Specifying the depth allows for optimizations if the caller isn't writing all
+// levels of the PDEs. Specifying the page size allows for optimizations if
+// the caller can guarantee caches for smaller page sizes don't need to be
+// invalidated.
+//
+// Depth follows the MMU code convention where depth 0 is the top level and here
+// means to invalidate all levels. See uvm_pascal_mmu.c for an example of depth
+// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
+// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
+// them to be included in the invalidation.
+//
+// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
+// performed logically after the TLB invalidate such that all physical memory
+// accesses using the old translations are ordered to the scope of the membar.
+//
+// Note that this can end up pushing a lot of methods for big ranges so it's
+// better not to use it directly. Instead, uvm_tlb_batch* APIs should be used
+// that automatically switch between targeted VA invalidates and invalidate all.
+typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
+                                                 uvm_gpu_phys_address_t pdb,
+                                                 NvU32 depth,
+                                                 NvU64 base,
+                                                 NvU64 size,
+                                                 NvU32 page_size,
+                                                 uvm_membar_t membar);
+void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            NvU64 base,
+                                            NvU64 size,
+                                            NvU32 page_size,
+                                            uvm_membar_t membar);
+void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
+                                           uvm_gpu_phys_address_t pdb,
+                                           NvU32 depth,
+                                           NvU64 base,
+                                           NvU64 size,
+                                           NvU32 page_size,
+                                           uvm_membar_t membar);
+void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
+                                          uvm_gpu_phys_address_t pdb,
+                                          NvU32 depth,
+                                          NvU64 base,
+                                          NvU64 size,
+                                          NvU32 page_size,
+                                          uvm_membar_t membar);
+void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
+                                           uvm_gpu_phys_address_t pdb,
+                                           NvU32 depth,
+                                           NvU64 base,
+                                           NvU64 size,
+                                           NvU32 page_size,
+                                           uvm_membar_t membar);
+
+
+
+
+
+
+
+
+
+
+typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
+                                                   uvm_gpu_phys_address_t pdb,
+                                                   UVM_TEST_INVALIDATE_TLB_PARAMS *params);
+void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
+                                              uvm_gpu_phys_address_t pdb,
+                                              UVM_TEST_INVALIDATE_TLB_PARAMS *params);
+void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
+                                             uvm_gpu_phys_address_t pdb,
+                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
+void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
+                                             uvm_gpu_phys_address_t pdb,
+                                             UVM_TEST_INVALIDATE_TLB_PARAMS *params);
+
+
+
+
+
+
+// By default all semaphore release operations include a membar sys before the
+// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
+// uvm_push_set_flag().
+typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+
+
+
+void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+
+
+
+
+
+
+
+
+// Release a semaphore including a timestamp at the specific GPU VA.
+//
+// This operation writes 16 bytes of memory and the VA needs to be 16-byte
+// aligned. The value of the released payload is unspecified and shouldn't be
+// relied on, only the timestamp should be of interest.
+typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
+void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
+void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
+
+
+
+
+void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
+void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
+
+
+
+
+
+
+
+
+
+typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+
+
+
+
+typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+
+typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
+void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
+void uvm_hal_volta_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
+
+// Return the current GPU time in nanoseconds
+typedef NvU64 (*uvm_hal_get_time_t)(uvm_gpu_t *gpu);
+NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
+
+// Internal helpers used by the CE hal
+// Used to handle the offset encoding differences between architectures
+typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
+void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
+void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
+
+
+
+
+typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
+void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
+void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
+
+
+
+
+typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+
+typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
+NvU32 uvm_hal_maxwell_ce_plc_mode(void);
+NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
+
+// CE method validation
+typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+
+// Memcopy validation.
+// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
+// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
+// a given architecture.
+typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+
+// Patching of the memcopy source; if not needed for a given architecture use
+// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
+typedef void (*uvm_hal_ce_memcopy_patch_src)(uvm_push_t *push, uvm_gpu_address_t *src);
+void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src);
+void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src);
+
+// Memcopy size bytes from src to dst.
+//
+// By default all CE transfer operations include a membar sys after the
+// operation and are not pipelined. This can be affected by using
+// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
+typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
+void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
+
+// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
+typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
+void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
+
+// Memset validation.
+// The validation happens at the start of the memset (uvm_hal_memset_*_t)
+// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
+// a given architecture.
+typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
+bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
+bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
+
+// Memset size bytes at dst to a given N-byte input value.
+//
+// Size has to be a multiple of the element size. For example, the size passed
+// to uvm_hal_memset_4_t must be a multiple of 4 bytes.
+//
+// By default all CE transfer operations include a membar sys after the
+// operation and are not pipelined. This can be affected by using
+// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
+typedef void (*uvm_hal_memset_1_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
+typedef void (*uvm_hal_memset_4_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
+typedef void (*uvm_hal_memset_8_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
+
+// Simple wrapper for uvm_hal_memset_4_t with the address being virtual.
+typedef void (*uvm_hal_memset_v_4_t)(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
+
+void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
+void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
+void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
+void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
+
+
+
+
+
+
+
+// Increments the semaphore by 1, or resets to 0 if the incremented value would
+// exceed the payload.
+//
+// By default all CE semaphore operations include a membar sys before the
+// semaphore operation. This can be affected by using UVM_PUSH_FLAG_NEXT_CE_*
+// flags with uvm_push_set_flag().
+typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+
+
+
+
+// Initialize GPU architecture dependent properties
+typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
+
+
+
+
+
+
+
+// Retrieve the page-tree HAL for a given big page size
+typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
+typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
+
+
+
+void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+
+// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
+// faults serviced by UVM are handled. On Pascal the only such engine is
+// GRAPHICS, so no translation is provided.
+typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
+
+
+
+
+typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
+NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
+NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
+NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
+NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
+
+
+
+
+// Replayable faults
+typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_disable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_clear_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
+typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+
+// Parse the entry on the given buffer index. This also clears the valid bit of
+// the entry in the buffer.
+typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
+                                                   NvU32 index,
+                                                   uvm_fault_buffer_entry_t *buffer_entry);
+typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
+typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
+typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
+                                                uvm_gpu_phys_address_t instance_ptr,
+                                                NvU32 gpc_id,
+                                                NvU32 client_id);
+
+void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu);
+NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                          NvU32 index,
+                                                          uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
+NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
+                                             NvU32 index,
+                                             uvm_fault_buffer_entry_t *buffer_entry);
+NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
+NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
+void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
+                                            NvU32 index,
+                                            uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+
+
+
+
+bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
+bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
+
+typedef void (*uvm_hal_fault_buffer_parse_non_replayable_entry_t)(uvm_parent_gpu_t *parent_gpu,
+                                                                  void *fault_packet,
+                                                                  uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                         void *fault_packet,
+                                                                         uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                           void *fault_packet,
+                                                           uvm_fault_buffer_entry_t *buffer_entry);
+
+void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
+void uvm_hal_pascal_cancel_faults_global(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
+
+// Trigger fault replay on the GPU where the given pushbuffer is located.
+void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type);
+void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
+                                                        uvm_gpu_phys_address_t instance_ptr,
+                                                        NvU32 gpc_id,
+                                                        NvU32 client_id);
+void uvm_hal_pascal_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
+void uvm_hal_pascal_cancel_faults_targeted(uvm_push_t *push,
+                                           uvm_gpu_phys_address_t instance_ptr,
+                                           NvU32 gpc_id,
+                                           NvU32 client_id);
+
+typedef void (*uvm_hal_fault_cancel_va_t)(uvm_push_t *push,
+                                          uvm_gpu_phys_address_t pdb,
+                                          const uvm_fault_buffer_entry_t *fault_entry,
+                                          uvm_fault_cancel_va_mode_t cancel_va_mode);
+
+void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  const uvm_fault_buffer_entry_t *fault_entry,
+                                                  uvm_fault_cancel_va_mode_t cancel_va_mode);
+
+void uvm_hal_volta_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
+void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
+                                    uvm_gpu_phys_address_t pdb,
+                                    const uvm_fault_buffer_entry_t *fault_entry,
+                                    uvm_fault_cancel_va_mode_t cancel_va_mode);
+
+
+
+
+
+
+
+
+typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
+                                                            uvm_user_channel_t *user_channel,
+                                                            const uvm_fault_buffer_entry_t *buffer_entry);
+
+void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
+                                                                   uvm_user_channel_t *user_channel,
+                                                                   const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
+                                                     uvm_user_channel_t *user_channel,
+                                                     const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_turing_host_clear_faulted_channel_method(uvm_push_t *push,
+                                                      uvm_user_channel_t *user_channel,
+                                                      const uvm_fault_buffer_entry_t *buffer_entry);
+typedef void (*uvm_hal_host_clear_faulted_channel_register_t)(uvm_user_channel_t *user_channel,
+                                                              const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
+                                                                     const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
+                                                        const uvm_fault_buffer_entry_t *buffer_entry);
+
+typedef void (*uvm_hal_host_clear_faulted_channel_sw_method_t)(uvm_push_t *push,
+                                                               uvm_user_channel_t *user_channel,
+                                                               const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
+                                                                     uvm_user_channel_t *user_channel,
+                                                                     const uvm_fault_buffer_entry_t *buffer_entry);
+void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
+                                                         uvm_user_channel_t *user_channel,
+                                                         const uvm_fault_buffer_entry_t *buffer_entry);
+
+void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
+void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
+
+// Access counters
+typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+
+// Parse the entry on the given buffer index. This also clears the valid bit of
+// the entry in the buffer.
+typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
+                                                            NvU32 index,
+                                                            uvm_access_counter_buffer_entry_t *buffer_entry);
+typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
+typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
+typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
+typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
+                                                        const uvm_access_counter_buffer_entry_t *buffer_entry);
+
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                   NvU32 index,
+                                                                   uvm_access_counter_buffer_entry_t *buffer_entry);
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
+void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
+void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
+                                                               const uvm_access_counter_buffer_entry_t *buffer_entry);
+
+void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
+                                                     NvU32 index,
+                                                     uvm_access_counter_buffer_entry_t *buffer_entry);
+bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
+void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
+void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
+                                                 const uvm_access_counter_buffer_entry_t *buffer_entry);
+
+void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+struct uvm_host_hal_struct
+{
+    uvm_hal_init_t init;
+    uvm_hal_host_method_validate method_validate;
+    uvm_hal_host_sw_method_validate sw_method_validate;
+    uvm_hal_wait_for_idle_t wait_for_idle;
+    uvm_hal_membar_sys_t membar_sys;
+    uvm_hal_membar_gpu_t membar_gpu;
+    uvm_hal_noop_t noop;
+    uvm_hal_interrupt_t interrupt;
+    uvm_hal_semaphore_release_t semaphore_release;
+    uvm_hal_semaphore_acquire_t semaphore_acquire;
+    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+    uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
+    uvm_hal_host_write_gpu_put_t write_gpu_put;
+    uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
+    uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
+    uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
+    uvm_hal_fault_buffer_replay_t replay_faults;
+    uvm_hal_fault_cancel_global_t cancel_faults_global;
+    uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
+    uvm_hal_fault_cancel_va_t cancel_faults_va;
+    uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_sw_method;
+    uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
+    uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
+    uvm_hal_access_counter_clear_all_t access_counter_clear_all;
+    uvm_hal_access_counter_clear_type_t access_counter_clear_type;
+    uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
+    uvm_hal_get_time_t get_time;
+};
+
+struct uvm_ce_hal_struct
+{
+    uvm_hal_init_t init;
+    uvm_hal_ce_method_validate method_validate;
+    uvm_hal_semaphore_release_t semaphore_release;
+    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+    uvm_hal_ce_offset_out_t offset_out;
+    uvm_hal_ce_offset_in_out_t offset_in_out;
+    uvm_hal_ce_phys_mode_t phys_mode;
+    uvm_hal_ce_plc_mode_t plc_mode;
+    uvm_hal_ce_memcopy_validate memcopy_validate;
+    uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
+    uvm_hal_memcopy_t memcopy;
+    uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
+    uvm_hal_ce_memset_validate memset_validate;
+    uvm_hal_memset_1_t memset_1;
+    uvm_hal_memset_4_t memset_4;
+    uvm_hal_memset_8_t memset_8;
+    uvm_hal_memset_v_4_t memset_v_4;
+    uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
+};
+
+struct uvm_arch_hal_struct
+{
+    uvm_hal_arch_init_properties_t init_properties;
+    uvm_hal_lookup_mode_hal_t mmu_mode_hal;
+    uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
+    uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
+    uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
+    uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
+};
+
+struct uvm_fault_buffer_hal_struct
+{
+    uvm_hal_enable_replayable_faults_t enable_replayable_faults;
+    uvm_hal_disable_replayable_faults_t disable_replayable_faults;
+    uvm_hal_clear_replayable_faults_t clear_replayable_faults;
+    uvm_hal_fault_buffer_read_put_t read_put;
+    uvm_hal_fault_buffer_read_get_t read_get;
+    uvm_hal_fault_buffer_write_get_t write_get;
+    uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
+    uvm_hal_fault_buffer_parse_entry_t parse_entry;
+    uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
+    uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
+    uvm_hal_fault_buffer_entry_size_t entry_size;
+    uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
+};
+
+struct uvm_access_counter_buffer_hal_struct
+{
+    uvm_hal_enable_access_counter_notifications_t enable_access_counter_notifications;
+    uvm_hal_disable_access_counter_notifications_t disable_access_counter_notifications;
+    uvm_hal_clear_access_counter_notifications_t clear_access_counter_notifications;
+    uvm_hal_access_counter_buffer_parse_entry_t parse_entry;
+    uvm_hal_access_counter_buffer_entry_is_valid_t entry_is_valid;
+    uvm_hal_access_counter_buffer_entry_clear_valid_t entry_clear_valid;
+    uvm_hal_access_counter_buffer_entry_size_t entry_size;
+};
+
+
+
+
+
+
+
+
+
+
+
+
+typedef struct
+{
+    // id is either a hardware class or GPU architecture
+    NvU32 id;
+    NvU32 parent_id;
+    union
+    {
+        // host_ops: id is a hardware class
+        uvm_host_hal_t host_ops;
+
+        // ce_ops: id is a hardware class
+        uvm_ce_hal_t ce_ops;
+
+        // arch_ops: id is an architecture
+        uvm_arch_hal_t arch_ops;
+
+        // fault_buffer_ops: id is an architecture
+        uvm_fault_buffer_hal_t fault_buffer_ops;
+
+        // access_counter_buffer_ops: id is an architecture
+        uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
+
+
+
+
+
+    } u;
+} uvm_hal_class_ops_t;
+
+NV_STATUS uvm_hal_init_table(void);
+NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu);
+
+// Helper to push a SYS or GPU membar based on the membar type
+//
+// Notably this doesn't just get the GPU from the push object to support the
+// test mode of the page tree code that doesn't do real pushes.
+static void uvm_hal_membar(uvm_gpu_t *gpu, uvm_push_t *push, uvm_membar_t membar)
+{
+    switch (membar) {
+        case UVM_MEMBAR_SYS:
+            gpu->parent->host_hal->membar_sys(push);
+            break;
+        case UVM_MEMBAR_GPU:
+            gpu->parent->host_hal->membar_gpu(push);
+            break;
+        case UVM_MEMBAR_NONE:
+            break;
+    }
+}
+
+static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    gpu->parent->host_hal->wait_for_idle(push);
+    uvm_hal_membar(gpu, push, membar);
+}
+
+// Internal helper used by the TLB invalidate hal functions. This issues the
+// appropriate Host membar(s) after a TLB invalidate.
+void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
+
+#endif // __UVM_HAL_H__
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -0,0 +1,533 @@
+/*******************************************************************************
+    Copyright (c) 2016-2019 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_HAL_TYPES_H__
+#define __UVM_HAL_TYPES_H__
+
+#include "uvm_common.h"
+#include "uvm_forward_decl.h"
+#include "uvm_processors.h"
+
+#define UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE 32
+
+typedef enum
+{
+    UVM_APERTURE_PEER_0,
+    UVM_APERTURE_PEER_1,
+    UVM_APERTURE_PEER_2,
+    UVM_APERTURE_PEER_3,
+    UVM_APERTURE_PEER_4,
+    UVM_APERTURE_PEER_5,
+    UVM_APERTURE_PEER_6,
+    UVM_APERTURE_PEER_7,
+    UVM_APERTURE_PEER_MAX,
+    UVM_APERTURE_SYS,
+    UVM_APERTURE_VID,
+
+    // DEFAULT is a special value to let MMU pick the location of page tables
+    UVM_APERTURE_DEFAULT,
+
+    UVM_APERTURE_MAX
+} uvm_aperture_t;
+
+const char *uvm_aperture_string(uvm_aperture_t aperture);
+
+static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
+{
+    return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
+}
+
+static inline NvU32 UVM_APERTURE_PEER_ID(uvm_aperture_t aperture)
+{
+    UVM_ASSERT(uvm_aperture_is_peer(aperture));
+
+    return (NvU32)aperture;
+}
+
+static inline uvm_aperture_t UVM_APERTURE_PEER(NvU32 id)
+{
+    uvm_aperture_t aperture = (uvm_aperture_t)id;
+
+    UVM_ASSERT(UVM_APERTURE_PEER_ID(aperture) == id);
+
+    return aperture;
+}
+
+// A physical GPU address
+typedef struct
+{
+    NvU64 address;
+
+    uvm_aperture_t aperture;
+} uvm_gpu_phys_address_t;
+
+// Create a physical GPU address
+static uvm_gpu_phys_address_t uvm_gpu_phys_address(uvm_aperture_t aperture, NvU64 address)
+{
+    return (uvm_gpu_phys_address_t){ address, aperture };
+}
+
+// Compare two gpu physical addresses
+static int uvm_gpu_phys_addr_cmp(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b)
+{
+    int result = UVM_CMP_DEFAULT(a.aperture, b.aperture);
+    if (result != 0)
+        return result;
+
+    return UVM_CMP_DEFAULT(a.address, b.address);
+}
+
+// A physical or virtual address directly accessible by a GPU.
+// This implies that the address already went through identity mapping and IOMMU
+// translations and is only valid for a specific GPU.
+typedef struct
+{
+    // Physical or virtual address
+    // In general, only valid for a specific GPU
+    NvU64 address;
+
+    // Aperture for a physical address
+    uvm_aperture_t aperture;
+
+    // Whether the address is virtual
+    bool is_virtual;
+} uvm_gpu_address_t;
+
+// Create a virtual GPU address
+static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
+{
+    uvm_gpu_address_t address = {0};
+    address.address = va;
+    address.aperture = UVM_APERTURE_MAX;
+    address.is_virtual = true;
+    return address;
+}
+
+// Create a physical GPU address
+static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
+{
+    uvm_gpu_address_t address = {0};
+    address.aperture = aperture;
+    address.address = pa;
+    return address;
+}
+
+// Create a GPU address from a physical GPU address
+static uvm_gpu_address_t uvm_gpu_address_from_phys(uvm_gpu_phys_address_t phys_address)
+{
+    return uvm_gpu_address_physical(phys_address.aperture, phys_address.address);
+}
+
+static const char *uvm_gpu_address_aperture_string(uvm_gpu_address_t addr)
+{
+    if (addr.is_virtual)
+        return "VIRTUAL";
+    return uvm_aperture_string(addr.aperture);
+}
+
+// Compare two gpu addresses
+static int uvm_gpu_addr_cmp(uvm_gpu_address_t a, uvm_gpu_address_t b)
+{
+    int result = UVM_CMP_DEFAULT(a.is_virtual, b.is_virtual);
+    if (result != 0)
+        return result;
+
+    if (a.is_virtual) {
+        return UVM_CMP_DEFAULT(a.address, b.address);
+    }
+    else {
+        uvm_gpu_phys_address_t phys_a = { a.address, a.aperture };
+        uvm_gpu_phys_address_t phys_b = { b.address, b.aperture };
+
+        return uvm_gpu_phys_addr_cmp(phys_a, phys_b);
+    }
+}
+
+// For processors with no concept of an atomic fault (the CPU and pre-Pascal
+// GPUs), UVM_PROT_READ_WRITE and UVM_PROT_READ_WRITE_ATOMIC are
+// interchangeable.
+typedef enum
+{
+    UVM_PROT_NONE,
+    UVM_PROT_READ_ONLY,
+    UVM_PROT_READ_WRITE,
+    UVM_PROT_READ_WRITE_ATOMIC,
+    UVM_PROT_MAX
+} uvm_prot_t;
+
+const char *uvm_prot_string(uvm_prot_t prot);
+
+typedef enum
+{
+    UVM_MEMBAR_NONE,
+    UVM_MEMBAR_GPU,
+    UVM_MEMBAR_SYS,
+} uvm_membar_t;
+
+const char *uvm_membar_string(uvm_membar_t membar);
+
+// Types of memory accesses that can cause a replayable fault on the GPU. They
+// are ordered by access "intrusiveness" to simplify fault preprocessing (e.g.
+// to implement fault coalescing)
+typedef enum
+{
+    UVM_FAULT_ACCESS_TYPE_PREFETCH = 0,
+    UVM_FAULT_ACCESS_TYPE_READ,
+    UVM_FAULT_ACCESS_TYPE_WRITE,
+    UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK,
+    UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG,
+    UVM_FAULT_ACCESS_TYPE_COUNT
+} uvm_fault_access_type_t;
+
+const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_type);
+
+static NvU32 uvm_fault_access_type_mask_bit(uvm_fault_access_type_t fault_access_type)
+{
+    BUILD_BUG_ON(UVM_FAULT_ACCESS_TYPE_COUNT >= 32);
+
+    UVM_ASSERT(fault_access_type >= 0);
+    UVM_ASSERT(fault_access_type < UVM_FAULT_ACCESS_TYPE_COUNT);
+
+    return (NvU32)1 << fault_access_type;
+}
+
+static bool uvm_fault_access_type_mask_test(NvU32 mask, uvm_fault_access_type_t fault_access_type)
+{
+    return uvm_fault_access_type_mask_bit(fault_access_type) & mask;
+}
+
+static void uvm_fault_access_type_mask_set(NvU32 *mask, uvm_fault_access_type_t fault_access_type)
+{
+    *mask |= uvm_fault_access_type_mask_bit(fault_access_type);
+}
+
+static uvm_fault_access_type_t uvm_fault_access_type_mask_highest(NvU32 mask)
+{
+    int pos;
+
+    UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
+    UVM_ASSERT(mask != 0);
+
+    pos = __fls(mask);
+    UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);
+
+    return pos;
+}
+
+static uvm_fault_access_type_t uvm_fault_access_type_mask_lowest(NvU32 mask)
+{
+    int pos;
+
+    UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
+    UVM_ASSERT(mask != 0);
+
+    pos = __ffs(mask);
+    UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);
+
+    return pos;
+}
+
+typedef enum
+{
+    // Cancel all accesses on the page
+    UVM_FAULT_CANCEL_VA_MODE_ALL = 0,
+
+    // Cancel write and atomic accesses on the page
+    UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC,
+
+    UVM_FAULT_CANCEL_VA_MODE_COUNT,
+} uvm_fault_cancel_va_mode_t;
+
+// Types of faults that can show up in the fault buffer. Non-UVM related faults are grouped in FATAL category
+// since we don't care about the specific type
+typedef enum
+{
+    UVM_FAULT_TYPE_INVALID_PDE = 0,
+    UVM_FAULT_TYPE_INVALID_PTE,
+    UVM_FAULT_TYPE_ATOMIC,
+
+    // WRITE to READ-ONLY
+    UVM_FAULT_TYPE_WRITE,
+
+    // READ to WRITE-ONLY (ATS)
+    UVM_FAULT_TYPE_READ,
+
+    // The next values are considered fatal and are not handled by the UVM driver
+    UVM_FAULT_TYPE_FATAL,
+
+    // Values required for tools
+    UVM_FAULT_TYPE_PDE_SIZE = UVM_FAULT_TYPE_FATAL,
+    UVM_FAULT_TYPE_VA_LIMIT_VIOLATION,
+    UVM_FAULT_TYPE_UNBOUND_INST_BLOCK,
+    UVM_FAULT_TYPE_PRIV_VIOLATION,
+    UVM_FAULT_TYPE_PITCH_MASK_VIOLATION,
+    UVM_FAULT_TYPE_WORK_CREATION,
+    UVM_FAULT_TYPE_UNSUPPORTED_APERTURE,
+    UVM_FAULT_TYPE_COMPRESSION_FAILURE,
+    UVM_FAULT_TYPE_UNSUPPORTED_KIND,
+    UVM_FAULT_TYPE_REGION_VIOLATION,
+    UVM_FAULT_TYPE_POISONED,
+
+    UVM_FAULT_TYPE_COUNT
+} uvm_fault_type_t;
+
+const char *uvm_fault_type_string(uvm_fault_type_t fault_type);
+
+// Main MMU client type that triggered the fault
+typedef enum
+{
+    UVM_FAULT_CLIENT_TYPE_GPC = 0,
+    UVM_FAULT_CLIENT_TYPE_HUB,
+    UVM_FAULT_CLIENT_TYPE_COUNT
+} uvm_fault_client_type_t;
+
+const char *uvm_fault_client_type_string(uvm_fault_client_type_t fault_client_type);
+
+typedef enum
+{
+    UVM_MMU_ENGINE_TYPE_GRAPHICS = 0,
+    UVM_MMU_ENGINE_TYPE_HOST,
+    UVM_MMU_ENGINE_TYPE_CE,
+    UVM_MMU_ENGINE_TYPE_COUNT,
+} uvm_mmu_engine_type_t;
+
+const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type);
+
+// HW unit that triggered the fault. We include the fields required for fault cancelling. Including more information
+// might be useful for performance heuristics in the future
+typedef struct
+{
+    uvm_fault_client_type_t                client_type  : order_base_2(UVM_FAULT_CLIENT_TYPE_COUNT) + 1;
+
+    uvm_mmu_engine_type_t              mmu_engine_type  : order_base_2(UVM_MMU_ENGINE_TYPE_COUNT) + 1;
+
+    NvU16                                    client_id;
+
+    NvU16                                mmu_engine_id;
+
+    union
+    {
+        struct
+        {
+            NvU16                              utlb_id;
+
+            NvU8                                gpc_id;
+        };
+
+        // TODO: Bug 3283289: the channel ID, which is only populated for
+        // non-replayable faults, is never consumed.
+        NvU16                               channel_id;
+    };
+
+
+    // Identifier of the subcontext that caused the fault. HW uses it as an
+    // offset in the instance block to obtain the GPU VA space PDB of the
+    // faulting process.
+    NvU8                                         ve_id;
+} uvm_fault_source_t;
+
+struct uvm_fault_buffer_entry_struct
+{
+    //
+    // The next fields are filled by the fault buffer parsing code
+    //
+
+    // Virtual address of the faulting request aligned to CPU page size
+    NvU64                                fault_address;
+
+    // GPU timestamp in (nanoseconds) when the fault was inserted in the fault
+    // buffer
+    NvU64                                    timestamp;
+
+    uvm_gpu_phys_address_t                instance_ptr;
+
+    uvm_fault_source_t                    fault_source;
+
+    uvm_fault_type_t                        fault_type : order_base_2(UVM_FAULT_TYPE_COUNT) + 1;
+
+    uvm_fault_access_type_t          fault_access_type : order_base_2(UVM_FAULT_ACCESS_TYPE_COUNT) + 1;
+
+    //
+    // The next fields are managed by the fault handling code
+    //
+
+    uvm_va_space_t                           *va_space;
+
+    // This is set to true when some fault could not be serviced and a
+    // cancel command needs to be issued
+    bool                                      is_fatal : 1;
+
+    // This is set to true for all GPU faults on a page that is thrashing
+    bool                                  is_throttled : 1;
+
+    // This is set to true if the fault has prefetch access type and the
+    // address or the access privileges are not valid
+    bool                           is_invalid_prefetch : 1;
+
+    bool                                 is_replayable : 1;
+
+    bool                                    is_virtual : 1;
+
+    bool                             in_protected_mode : 1;
+
+    bool                                      filtered : 1;
+
+    // Reason for the fault to be fatal
+    UvmEventFatalReason                   fatal_reason : order_base_2(UvmEventNumFatalReasons) + 1;
+
+    // Mode to be used to cancel faults. This must be set according to the
+    // fatal fault reason and the fault access types of the merged fault
+    // instances.
+    union
+    {
+        struct
+        {
+            uvm_fault_cancel_va_mode_t  cancel_va_mode : order_base_2(UVM_FAULT_CANCEL_VA_MODE_COUNT) + 1;
+        } replayable;
+
+        struct
+        {
+            NvU32                         buffer_index;
+        } non_replayable;
+    };
+
+    // List of duplicate fault buffer entries that have been merged into this
+    // one
+    struct list_head        merged_instances_list;
+
+    // Access types to this page for all accesses that have been coalesced at
+    // fetch time. It must include, at least, fault_access_type
+    NvU32                        access_type_mask;
+
+    // Number of faults with the same properties that have been coalesced at
+    // fetch time
+    NvU16                           num_instances;
+};
+
+typedef enum
+{
+    // Completes when all fault replays are in-flight
+    UVM_FAULT_REPLAY_TYPE_START = 0,
+
+    // Completes when all faulting accesses have been correctly translated or faulted again
+    UVM_FAULT_REPLAY_TYPE_START_ACK_ALL,
+
+    UVM_FAULT_REPLAY_TYPE_MAX
+} uvm_fault_replay_type_t;
+
+static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
+{
+    BUILD_BUG_ON(UVM_MEMBAR_NONE >= UVM_MEMBAR_GPU);
+    BUILD_BUG_ON(UVM_MEMBAR_GPU >= UVM_MEMBAR_SYS);
+    return max(membar_1, membar_2);
+}
+
+typedef enum
+{
+    UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
+    UVM_ACCESS_COUNTER_TYPE_MOMC,
+
+    UVM_ACCESS_COUNTER_TYPE_MAX,
+} uvm_access_counter_type_t;
+
+const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
+
+struct uvm_access_counter_buffer_entry_struct
+{
+    // Whether this counter refers to outbound accesses to remote GPUs or
+    // sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
+    // GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
+    uvm_access_counter_type_t counter_type;
+
+    // Address of the region for which a notification was sent
+    uvm_gpu_address_t address;
+
+    // These fields are only valid if address.is_virtual is true
+    union
+    {
+        struct
+        {
+            // Instance pointer of one of the channels in the TSG that triggered the
+            // notification
+            uvm_gpu_phys_address_t instance_ptr;
+
+            uvm_mmu_engine_type_t mmu_engine_type;
+
+            NvU32 mmu_engine_id;
+
+            // Identifier of the subcontext that performed the memory accesses that
+            // triggered the notification. This value, combined with the instance_ptr,
+            // is needed to obtain the GPU VA space of the process that triggered the
+            // notification.
+            NvU32 ve_id;
+
+            // VA space for the address that triggered the notification
+            uvm_va_space_t *va_space;
+        } virtual_info;
+
+        // These fields are only valid if address.is_virtual is false
+        struct
+        {
+            // Processor id where data is resident
+            //
+            // Although this information is not tied to a VA space, we can use
+            // a regular processor id because P2P is not allowed between
+            // partitioned GPUs.
+            uvm_processor_id_t resident_id;
+        } physical_info;
+    };
+
+    // Number of times the tracked region was accessed since the last time it
+    // was cleared. Counter values saturate at the maximum value supported by
+    // the GPU (2^16 - 1 in Volta)
+    NvU32 counter_value;
+
+    // When the granularity of the tracked regions is greater than 64KB, the
+    // region is split into 32 equal subregions. Each bit in this field
+    // represents one of those subregions. 1 means that the subregion has been
+    // accessed
+    NvU32 sub_granularity;
+
+    // Opaque fields provided by HW, required for targeted clear of a counter
+    NvU32 bank;
+    NvU32 tag;
+};
+
+static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_type)
+{
+    switch (access_type) {
+        case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
+            return UVM_PROT_READ_WRITE_ATOMIC;
+
+        case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
+        case UVM_FAULT_ACCESS_TYPE_WRITE:
+            return UVM_PROT_READ_WRITE;
+
+        default:
+            // Prefetch faults, if not ignored, are handled like read faults and require
+            // a mapping with, at least, READ_ONLY access permission
+            return UVM_PROT_READ_ONLY;
+    }
+}
+
+#endif // __UVM_HAL_TYPES_H__
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -0,0 +1,790 @@
+/*******************************************************************************
+    Copyright (c) 2016-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hmm.h"
+
+static bool uvm_disable_hmm = false;
+module_param(uvm_disable_hmm, bool, 0444);
+MODULE_PARM_DESC(uvm_disable_hmm,
+                 "Force-disable HMM functionality in the UVM driver. "
+                 "Default: false (i.e, HMM is potentially enabled). Ignored if "
+                 "HMM is not supported in the driver, or if ATS settings "
+                 "conflict with HMM.");
+
+
+#if UVM_IS_CONFIG_HMM()
+
+#include <linux/hmm.h>
+#include <linux/userfaultfd_k.h>
+
+#include "uvm_common.h"
+#include "uvm_gpu.h"
+#include "uvm_va_block_types.h"
+#include "uvm_va_space_mm.h"
+#include "uvm_va_space.h"
+#include "uvm_va_range.h"
+#include "uvm_range_tree.h"
+#include "uvm_lock.h"
+#include "uvm_api.h"
+#include "uvm_va_policy.h"
+
+bool uvm_hmm_is_enabled_system_wide(void)
+{
+    return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
+}
+
+bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
+{
+    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
+    return uvm_hmm_is_enabled_system_wide() &&
+           uvm_va_space_mm_enabled(va_space) &&
+           !(va_space->initialization_flags & UVM_INIT_FLAGS_DISABLE_HMM) &&
+           !va_space->hmm.disable;
+}
+
+static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
+{
+    if (!node)
+        return NULL;
+    return container_of(node, uvm_va_block_t, hmm.node);
+}
+
+NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
+{
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_OK;
+
+    uvm_assert_mmap_lock_locked_write(mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
+    // Disable HMM by default for each va_space until enough functionality is
+    // implemented that this can be enabled by default.
+    // Note that it can be enabled for testing under controlled circumstances.
+    va_space->hmm.disable = true;
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
+{
+    uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+    int ret;
+
+    if (!uvm_hmm_is_enabled_system_wide() || !mm)
+        return NV_WARN_NOTHING_TO_DO;
+
+    uvm_assert_mmap_lock_locked_write(mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // Temporarily enable HMM for testing.
+    va_space->hmm.disable = false;
+
+    // Initialize MMU interval notifiers for this process.
+    // This allows mmu_interval_notifier_insert() to be called without holding
+    // the mmap_lock for write.
+    // Note: there is no __mmu_notifier_unregister(), this call just allocates
+    // memory which is attached to the mm_struct and freed when the mm_struct
+    // is freed.
+    ret = __mmu_notifier_register(NULL, mm);
+    if (ret)
+        return errno_to_nv_status(ret);
+
+    uvm_range_tree_init(&hmm_va_space->blocks);
+    uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);
+
+    return NV_OK;
+}
+
+void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
+{
+    uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
+    uvm_range_tree_node_t *node, *next;
+    uvm_va_block_t *va_block;
+
+    if (!uvm_hmm_is_enabled(va_space) || uvm_va_space_initialized(va_space) != NV_OK)
+        return;
+
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // The blocks_lock is not needed when the va_space lock is held for write.
+    uvm_range_tree_for_each_safe(node, next, &hmm_va_space->blocks) {
+        va_block = hmm_va_block_from_node(node);
+        uvm_range_tree_remove(&hmm_va_space->blocks, node);
+        mmu_interval_notifier_remove(&va_block->hmm.notifier);
+        uvm_va_block_kill(va_block);
+    }
+
+    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
+    va_space->hmm.disable = true;
+}
+
+static bool hmm_invalidate(uvm_va_block_t *va_block,
+                           const struct mmu_notifier_range *range,
+                           unsigned long cur_seq)
+{
+    struct mmu_interval_notifier *mni = &va_block->hmm.notifier;
+    NvU64 start, end;
+
+    // The MMU_NOTIFY_RELEASE event isn't really needed since mn_itree_release()
+    // doesn't remove the interval notifiers from the struct_mm so there will
+    // be a full range MMU_NOTIFY_UNMAP event after the release from
+    // unmap_vmas() during exit_mmap().
+    if (range->event == MMU_NOTIFY_SOFT_DIRTY || range->event == MMU_NOTIFY_RELEASE)
+        return true;
+
+    // Blockable is only set false by
+    // mmu_notifier_invalidate_range_start_nonblock() which is only called in
+    // __oom_reap_task_mm().
+    if (!mmu_notifier_range_blockable(range))
+        return false;
+
+    // Ignore invalidation callbacks for device private pages since the
+    // invalidation is handled as part of the migration process.
+    // Note that the va_space pointer won't be NULL if the callback is for
+    // MMU_NOTIFY_MIGRATE/MMU_NOTIFY_EXCLUSIVE because the va_block lock
+    // is already held and we have to prevent recursively getting the lock.
+    if ((range->event == MMU_NOTIFY_MIGRATE || range->event == MMU_NOTIFY_EXCLUSIVE) &&
+            range->owner == va_block->hmm.va_space)
+        return true;
+
+    uvm_mutex_lock(&va_block->lock);
+
+    // Ignore this invalidation callback if the block is dead.
+    if (uvm_va_block_is_dead(va_block))
+        goto unlock;
+
+    mmu_interval_set_seq(mni, cur_seq);
+
+    // Note: unmap_vmas() does MMU_NOTIFY_UNMAP [0, 0xffffffffffffffff]
+    start = range->start;
+    end = (range->end == ULONG_MAX) ? range->end : range->end - 1;
+    if (start < va_block->start)
+        start = va_block->start;
+    if (end > va_block->end)
+        end = va_block->end;
+
+    if (range->event == MMU_NOTIFY_UNMAP)
+        uvm_va_policy_clear(va_block, start, end);
+
+unlock:
+    uvm_mutex_unlock(&va_block->lock);
+
+    return true;
+}
+
+static bool uvm_hmm_invalidate_entry(struct mmu_interval_notifier *mni,
+                                     const struct mmu_notifier_range *range,
+                                     unsigned long cur_seq)
+{
+    uvm_va_block_t *va_block = container_of(mni, uvm_va_block_t, hmm.notifier);
+
+    UVM_ENTRY_RET(hmm_invalidate(va_block, range, cur_seq));
+}
+
+static const struct mmu_interval_notifier_ops uvm_hmm_notifier_ops =
+{
+    .invalidate = uvm_hmm_invalidate_entry,
+};
+
+NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
+                                NvU64 addr,
+                                uvm_va_block_t **va_block_ptr)
+{
+    uvm_range_tree_node_t *node;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_ERR_INVALID_ADDRESS;
+
+    UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
+    uvm_assert_rwsem_locked(&va_space->lock);
+
+    uvm_mutex_lock(&va_space->hmm.blocks_lock);
+    node = uvm_range_tree_find(&va_space->hmm.blocks, addr);
+    uvm_mutex_unlock(&va_space->hmm.blocks_lock);
+
+    if (!node)
+        return NV_ERR_OBJECT_NOT_FOUND;
+
+    *va_block_ptr = hmm_va_block_from_node(node);
+
+    return NV_OK;
+}
+
+static bool uvm_hmm_vma_is_valid(struct vm_area_struct *vma,
+                                 unsigned long addr,
+                                 bool allow_unreadable_vma)
+{
+    // UVM doesn't support userfaultfd. hmm_range_fault() doesn't support
+    // VM_IO, VM_PFNMAP, or VM_MIXEDMAP VMAs. It also doesn't support
+    // VMAs without VM_READ but we allow those VMAs to have policy set on
+    // them.
+    return vma &&
+           addr >= vma->vm_start &&
+           !userfaultfd_armed(vma) &&
+           !(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+           (allow_unreadable_vma || (vma->vm_flags & VM_READ));
+}
+
+static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
+                                          NvU64 addr,
+                                          bool allow_unreadable_vma,
+                                          uvm_va_block_context_t *va_block_context,
+                                          uvm_va_block_t **va_block_ptr)
+{
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+    struct vm_area_struct *vma;
+    uvm_va_block_t *va_block;
+    NvU64 start, end;
+    NV_STATUS status;
+    int ret;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_ERR_INVALID_ADDRESS;
+
+    UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
+    UVM_ASSERT(mm);
+    uvm_assert_mmap_lock_locked(mm);
+    uvm_assert_rwsem_locked(&va_space->lock);
+    UVM_ASSERT(PAGE_ALIGNED(addr));
+
+    // Note that we have to allow PROT_NONE VMAs so that policies can be set.
+    vma = find_vma(mm, addr);
+    if (!uvm_hmm_vma_is_valid(vma, addr, allow_unreadable_vma))
+        return NV_ERR_INVALID_ADDRESS;
+
+    // Since we only hold the va_space read lock, there can be multiple
+    // parallel va_block insertions.
+    uvm_mutex_lock(&va_space->hmm.blocks_lock);
+
+    va_block = hmm_va_block_from_node(uvm_range_tree_find(&va_space->hmm.blocks, addr));
+    if (va_block)
+        goto done;
+
+    // The va_block is always created to cover the whole aligned
+    // UVM_VA_BLOCK_SIZE interval unless there are existing UVM va_ranges or
+    // HMM va_blocks. In that case, the new HMM va_block size is adjusted so it
+    // doesn't overlap.
+    start = UVM_VA_BLOCK_ALIGN_DOWN(addr);
+    end = start + UVM_VA_BLOCK_SIZE - 1;
+
+    // Search for existing UVM va_ranges in the start/end interval and create
+    // a maximum interval that doesn't overlap any existing UVM va_ranges.
+    // We know that 'addr' is not within a va_range or
+    // hmm_va_block_find_create() wouldn't be called.
+    uvm_range_tree_adjust_interval(&va_space->va_range_tree, addr, &start, &end);
+
+    // Search for existing HMM va_blocks in the start/end interval and create
+    // a maximum interval that doesn't overlap any existing HMM va_blocks.
+    uvm_range_tree_adjust_interval(&va_space->hmm.blocks, addr, &start, &end);
+
+    // Create a HMM va_block with a NULL va_range pointer.
+    status = uvm_va_block_create(NULL, start, end, &va_block);
+    if (status != NV_OK)
+        goto err_unlock;
+
+    va_block->hmm.node.start = start;
+    va_block->hmm.node.end = end;
+    va_block->hmm.va_space = va_space;
+    uvm_range_tree_init(&va_block->hmm.va_policy_tree);
+
+    ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
+                                       mm,
+                                       start,
+                                       end - start + 1,
+                                       &uvm_hmm_notifier_ops);
+    if (ret) {
+        status = errno_to_nv_status(ret);
+        goto err_release;
+    }
+
+    status = uvm_range_tree_add(&va_space->hmm.blocks, &va_block->hmm.node);
+    if (status != NV_OK) {
+        UVM_ASSERT(status != NV_ERR_UVM_ADDRESS_IN_USE);
+        goto err_unreg;
+    }
+
+done:
+    uvm_mutex_unlock(&va_space->hmm.blocks_lock);
+    if (va_block_context)
+        va_block_context->hmm.vma = vma;
+    *va_block_ptr = va_block;
+    return NV_OK;
+
+err_unreg:
+    mmu_interval_notifier_remove(&va_block->hmm.notifier);
+
+err_release:
+    uvm_va_block_release(va_block);
+
+err_unlock:
+    uvm_mutex_unlock(&va_space->hmm.blocks_lock);
+    return status;
+}
+
+NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
+                                       NvU64 addr,
+                                       uvm_va_block_context_t *va_block_context,
+                                       uvm_va_block_t **va_block_ptr)
+{
+    return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
+}
+
+typedef struct {
+    struct mmu_interval_notifier notifier;
+    uvm_va_block_t *existing_block;
+    uvm_va_block_t *new_block;
+} hmm_split_invalidate_data_t;
+
+static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
+                                 const struct mmu_notifier_range *range,
+                                 unsigned long cur_seq)
+{
+    hmm_split_invalidate_data_t *split_data = container_of(mni, hmm_split_invalidate_data_t, notifier);
+    uvm_va_block_t *existing_block = split_data->existing_block;
+    uvm_va_block_t *new_block = split_data->new_block;
+
+    if (uvm_ranges_overlap(existing_block->start, existing_block->end, range->start, range->end - 1))
+        hmm_invalidate(existing_block, range, cur_seq);
+
+    if (uvm_ranges_overlap(new_block->start, new_block->end, range->start, range->end - 1))
+        hmm_invalidate(new_block, range, cur_seq);
+
+    return true;
+}
+
+static bool hmm_split_invalidate_entry(struct mmu_interval_notifier *mni,
+                                       const struct mmu_notifier_range *range,
+                                       unsigned long cur_seq)
+{
+    UVM_ENTRY_RET(hmm_split_invalidate(mni, range, cur_seq));
+}
+
+static const struct mmu_interval_notifier_ops hmm_notifier_split_ops =
+{
+    .invalidate = hmm_split_invalidate_entry,
+};
+
+// Splits existing va_block into two pieces, with new_va_block always after
+// va_block. va_block is updated to have new_end. new_end+1 must be page-
+// aligned.
+//
+// Before: [----------- existing ------------]
+// After:  [---- existing ----][---- new ----]
+//                            ^new_end
+//
+// On error, va_block is still accessible and is left in its original
+// functional state.
+static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
+                                 NvU64 new_end,
+                                 uvm_va_block_t **new_block_ptr)
+{
+    uvm_va_space_t *va_space = va_block->hmm.va_space;
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+    hmm_split_invalidate_data_t split_data;
+    uvm_va_block_t *new_va_block;
+    NV_STATUS status;
+    int ret;
+
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    UVM_ASSERT(new_end > va_block->start);
+    UVM_ASSERT(new_end < va_block->end);
+    UVM_ASSERT(PAGE_ALIGNED(new_end + 1));
+
+    status = uvm_va_block_create(NULL, new_end + 1, va_block->end, &new_va_block);
+    if (status != NV_OK)
+        return status;
+
+    // Initialize the newly created HMM va_block.
+    new_va_block->hmm.va_space = va_space;
+    uvm_range_tree_init(&new_va_block->hmm.va_policy_tree);
+
+    // The MMU interval notifier has to be removed in order to resize it.
+    // That means there would be a window of time where invalidation callbacks
+    // could be missed. To handle this case, we register a temporary notifier
+    // to cover the same address range while resizing the old notifier (it is
+    // OK to have multiple notifiers for the same range, we may simply try to
+    // invalidate twice).
+    split_data.existing_block = va_block;
+    split_data.new_block = new_va_block;
+    ret = mmu_interval_notifier_insert(&split_data.notifier,
+                                       mm,
+                                       va_block->start,
+                                       new_va_block->end - va_block->start + 1,
+                                       &hmm_notifier_split_ops);
+
+    uvm_mutex_lock(&va_block->lock);
+
+    status = uvm_va_block_split_locked(va_block, new_end, new_va_block, NULL);
+    if (status != NV_OK)
+        goto err;
+
+    uvm_mutex_unlock(&va_block->lock);
+
+    // Since __mmu_notifier_register() was called when the va_space was
+    // initially created, we know that mm->notifier_subscriptions is valid
+    // and mmu_interval_notifier_insert() can't return ENOMEM.
+    // The only error return is for start + length overflowing but we already
+    // registered the same address range before so there should be no error.
+    UVM_ASSERT(!ret);
+
+    mmu_interval_notifier_remove(&va_block->hmm.notifier);
+
+    uvm_range_tree_shrink_node(&va_space->hmm.blocks, &va_block->hmm.node, va_block->start, va_block->end);
+
+    // Enable notifications on the old block with the smaller size.
+    ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
+                                       mm,
+                                       va_block->start,
+                                       va_block->end - va_block->start + 1,
+                                       &uvm_hmm_notifier_ops);
+    UVM_ASSERT(!ret);
+
+    new_va_block->hmm.node.start = new_va_block->start;
+    new_va_block->hmm.node.end = new_va_block->end;
+
+    ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
+                                       mm,
+                                       new_va_block->start,
+                                       new_va_block->end - new_va_block->start + 1,
+                                       &uvm_hmm_notifier_ops);
+    UVM_ASSERT(!ret);
+
+    mmu_interval_notifier_remove(&split_data.notifier);
+
+    status = uvm_range_tree_add(&va_space->hmm.blocks, &new_va_block->hmm.node);
+    UVM_ASSERT(status == NV_OK);
+
+    if (new_block_ptr)
+        *new_block_ptr = new_va_block;
+
+    return status;
+
+err:
+    uvm_mutex_unlock(&va_block->lock);
+    mmu_interval_notifier_remove(&split_data.notifier);
+    uvm_va_block_release(new_va_block);
+    return status;
+}
+
+// Check to see if the HMM va_block would overlap the range start/end and
+// split it so it can be removed. That breaks down to the following cases:
+// start/end could cover all of the HMM va_block ->
+//     remove the va_block
+// start/end could cover the left part of the HMM va_block ->
+//     remove the left part
+// start/end could cover the right part of the HMM va_block ->
+//     remove the right part
+// or start/end could "punch a hole" in the middle and leave the ends intact.
+// In each case, only one HMM va_block is removed so return it in out_va_block.
+static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
+                                       NvU64 start,
+                                       NvU64 end,
+                                       uvm_va_block_t **out_va_block)
+{
+    uvm_va_block_t *new;
+    NV_STATUS status;
+
+    if (va_block->start < start) {
+        status = hmm_split_block(va_block, start - 1, &new);
+        if (status != NV_OK)
+            return status;
+
+        // Keep the left part, the right part will be deleted.
+        va_block = new;
+    }
+
+    if (va_block->end > end) {
+        status = hmm_split_block(va_block, end, NULL);
+        if (status != NV_OK)
+            return status;
+
+        // Keep the right part, the left part will be deleted.
+    }
+
+    *out_va_block = va_block;
+
+    return NV_OK;
+}
+
+// Normally, the HMM va_block is destroyed when the va_space is destroyed
+// (i.e., when the /dev/nvidia-uvm device is closed). A munmap() call triggers
+// a uvm_hmm_invalidate() callback which unmaps the VMA's range from the GPU's
+// page tables. However, it doesn't destroy the va_block because that would
+// require calling mmu_interval_notifier_remove() which can't be called from
+// the invalidate callback due to Linux locking constraints. If a process
+// calls mmap()/munmap() for SAM and then creates a UVM managed allocation,
+// the same VMA range can be picked and there would be a UVM/HMM va_block
+// conflict. Creating a UVM managed allocation (or other va_range) calls this
+// function to remove stale HMM va_blocks or split the HMM va_block so there
+// is no overlap.
+NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
+                                   struct mm_struct *mm,
+                                   NvU64 start,
+                                   NvU64 end)
+{
+    uvm_range_tree_node_t *node, *next;
+    uvm_va_block_t *va_block;
+    NV_STATUS status;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_OK;
+
+    if (mm) {
+        UVM_ASSERT(mm == va_space->va_space_mm.mm);
+        uvm_assert_mmap_lock_locked(mm);
+    }
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // Process each HMM va_block that overlaps the interval [start, end].
+    // Note that end is inclusive.
+    // The blocks_lock is not needed when the va_space lock is held for write.
+    uvm_range_tree_for_each_in_safe(node, next, &va_space->hmm.blocks, start, end) {
+        va_block = hmm_va_block_from_node(node);
+
+        if (mm) {
+            status = split_block_if_needed(va_block, start, end, &va_block);
+            if (status != NV_OK)
+                return status;
+        }
+
+        // Note that this waits for any invalidations callbacks to complete
+        // so uvm_hmm_invalidate() won't see a block disapear.
+        // The va_space write lock should prevent uvm_hmm_va_block_find_create()
+        // from adding it back.
+        mmu_interval_notifier_remove(&va_block->hmm.notifier);
+        uvm_range_tree_remove(&va_space->hmm.blocks, &va_block->hmm.node);
+        uvm_va_block_kill(va_block);
+    }
+
+    UVM_ASSERT(!uvm_range_tree_iter_first(&va_space->hmm.blocks, start, end));
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
+                                  NvU64 addr,
+                                  uvm_va_policy_is_split_needed_t split_needed_cb,
+                                  void *data)
+{
+    uvm_va_block_t *va_block;
+    uvm_va_policy_node_t *node;
+    NV_STATUS status;
+
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+
+    // If there is no HMM va_block or the va_block doesn't span the policy
+    // addr, there is no need to split.
+    status = uvm_hmm_va_block_find(va_space, addr, &va_block);
+    if (status != NV_OK || va_block->start == addr)
+        return NV_OK;
+
+    uvm_mutex_lock(&va_block->lock);
+
+    node = uvm_va_policy_node_find(va_block, addr);
+    if (!node)
+        goto done;
+
+    // If the policy range doesn't span addr, we're done.
+    if (addr == node->node.start)
+        goto done;
+
+    if (split_needed_cb(&node->policy, data))
+        status = uvm_va_policy_node_split(va_block, node, addr - 1, NULL);
+
+done:
+    uvm_mutex_unlock(&va_block->lock);
+    return status;
+}
+
+NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
+                                         uvm_processor_id_t preferred_location,
+                                         NvU64 base,
+                                         NvU64 last_address)
+{
+    bool is_default = UVM_ID_IS_INVALID(preferred_location);
+    uvm_va_block_t *va_block;
+    NvU64 addr;
+    NV_STATUS status = NV_OK;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_ERR_INVALID_ADDRESS;
+
+    uvm_assert_mmap_lock_locked(va_space->va_space_mm.mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+    UVM_ASSERT(PAGE_ALIGNED(base));
+    UVM_ASSERT(PAGE_ALIGNED(last_address + 1));
+    UVM_ASSERT(base < last_address);
+
+    // Update HMM preferred location policy.
+
+    for (addr = base; addr < last_address; addr = va_block->end + 1) {
+        NvU64 end;
+
+        status = hmm_va_block_find_create(va_space, addr, true, NULL, &va_block);
+        if (status != NV_OK)
+            break;
+
+        end = min(last_address, va_block->end);
+
+        uvm_mutex_lock(&va_block->lock);
+
+        status = uvm_va_policy_set_range(va_block,
+                                         addr,
+                                         end,
+                                         UVM_VA_POLICY_PREFERRED_LOCATION,
+                                         is_default,
+                                         preferred_location,
+                                         UVM_READ_DUPLICATION_MAX);
+
+        // TODO: Bug 1750144: unset requires re-evaluating accessed-by mappings
+        // (see uvm_va_range_set_preferred_location's call of
+        // uvm_va_block_set_accessed_by), and set requires unmapping remote
+        // mappings (uvm_va_block_set_preferred_location_locked).
+
+        uvm_mutex_unlock(&va_block->lock);
+
+        if (status != NV_OK)
+            break;
+    }
+
+    return status;
+}
+
+NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
+                                  uvm_processor_id_t processor_id,
+                                  bool set_bit,
+                                  NvU64 base,
+                                  NvU64 last_address)
+{
+    uvm_va_block_t *va_block;
+    NvU64 addr;
+    NV_STATUS status = NV_OK;
+
+    if (!uvm_hmm_is_enabled(va_space))
+        return NV_ERR_INVALID_ADDRESS;
+
+    uvm_assert_mmap_lock_locked(va_space->va_space_mm.mm);
+    uvm_assert_rwsem_locked_write(&va_space->lock);
+    UVM_ASSERT(PAGE_ALIGNED(base));
+    UVM_ASSERT(PAGE_ALIGNED(last_address + 1));
+    UVM_ASSERT(base < last_address);
+
+    // Update HMM accessed by policy.
+
+    for (addr = base; addr < last_address; addr = va_block->end + 1) {
+        NvU64 end;
+
+        status = hmm_va_block_find_create(va_space, addr, true, NULL, &va_block);
+        if (status != NV_OK)
+            break;
+
+        end = min(last_address, va_block->end);
+
+        uvm_mutex_lock(&va_block->lock);
+
+        status = uvm_va_policy_set_range(va_block,
+                                         addr,
+                                         end,
+                                         UVM_VA_POLICY_ACCESSED_BY,
+                                         !set_bit,
+                                         processor_id,
+                                         UVM_READ_DUPLICATION_MAX);
+
+        // TODO: Bug 1750144: need to call va_block_set_accessed_by_locked()
+        // if read duplication isn't enabled.
+
+        uvm_mutex_unlock(&va_block->lock);
+
+        if (status != NV_OK)
+            break;
+    }
+
+    return status;
+}
+
+void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                             uvm_va_block_context_t *va_block_context,
+                             unsigned long addr,
+                             NvU64 *endp)
+{
+    struct vm_area_struct *vma = va_block_context->hmm.vma;
+    uvm_va_policy_node_t *node;
+    NvU64 end = *endp;
+
+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+    uvm_assert_mutex_locked(&va_block->lock);
+
+    if (end > vma->vm_end - 1)
+        end = vma->vm_end - 1;
+
+    node = uvm_va_policy_node_find(va_block, addr);
+    if (node) {
+        va_block_context->policy = &node->policy;
+        if (end > node->node.end)
+            end = node->node.end;
+    }
+    else
+        va_block_context->policy = &uvm_va_policy_default;
+
+    *endp = end;
+}
+
+NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
+                                            uvm_va_block_context_t *va_block_context,
+                                            uvm_page_index_t page_index,
+                                            uvm_page_index_t *outerp)
+{
+    struct vm_area_struct *vma;
+    unsigned long addr;
+    NvU64 end = va_block->end;
+    uvm_page_index_t outer;
+
+    UVM_ASSERT(uvm_va_block_is_hmm(va_block));
+    uvm_assert_mmap_lock_locked(va_block_context->mm);
+    uvm_assert_mutex_locked(&va_block->lock);
+
+    addr = uvm_va_block_cpu_page_address(va_block, page_index);
+
+    vma = vma_lookup(va_block_context->mm, addr);
+    if (!vma || !(vma->vm_flags & VM_READ))
+        return NV_ERR_INVALID_ADDRESS;
+
+    va_block_context->hmm.vma = vma;
+
+    uvm_hmm_find_policy_end(va_block, va_block_context, addr, &end);
+
+    outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
+    if (*outerp > outer)
+        *outerp = outer;
+
+    return NV_OK;
+}
+
+#endif // UVM_IS_CONFIG_HMM()
+
+
--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@@ -0,0 +1,287 @@
+/*******************************************************************************
+    Copyright (c) 2016-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef _UVM_HMM_H_
+#define _UVM_HMM_H_
+
+#include "nvtypes.h"
+#include "uvm_forward_decl.h"
+#include "uvm_va_block_types.h"
+#include "uvm_va_policy.h"
+#include "uvm_linux.h"
+#include "uvm_range_tree.h"
+#include "uvm_lock.h"
+
+typedef struct
+{
+    // This stores pointers to uvm_va_block_t for HMM blocks.
+    uvm_range_tree_t blocks;
+    uvm_mutex_t blocks_lock;
+
+    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
+    // This flag is set true by default for each va_space so most processes
+    // don't see partially implemented UVM-HMM behavior but can be enabled by
+    // test code for a given va_space so the test process can do some interim
+    // testing. It needs to be a separate flag instead of modifying
+    // uvm_disable_hmm or va_space->flags since those are user inputs and are
+    // visible/checked by test code.
+    // Remove this when UVM-HMM is fully integrated into chips_a.
+    bool disable;
+} uvm_hmm_va_space_t;
+
+#if UVM_IS_CONFIG_HMM()
+    // Tells whether HMM is enabled for the given va_space.
+    // If it is not enabled, all of the functions below are no-ops.
+    bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
+
+    // Self-explanatory name: reports if HMM is enabled system-wide.
+    bool uvm_hmm_is_enabled_system_wide(void);
+
+    // Initialize HMM for the given the va_space.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
+    // and the va_space lock must be held in write mode.
+    NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
+
+    // Initialize HMM for the given the va_space for testing.
+    // Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
+    // the UVM driver. Remove this when enough HMM functionality is implemented.
+    NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
+
+    // Destroy any HMM state for the given the va_space.
+    // Locking: va_space lock must be held in write mode.
+    void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);
+
+    // Find an existing HMM va_block.
+    // This function can be called without having retained and locked the mm,
+    // but in that case, the only allowed operations on the returned block are
+    // locking the block, reading its state, and performing eviction. GPU fault
+    // handling and user-initiated migrations are not allowed.
+    // Return values are the same as uvm_va_block_find().
+    // Locking: This must be called with va_space lock held in at least read
+    // mode.
+    NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
+                                    NvU64 addr,
+                                    uvm_va_block_t **va_block_ptr);
+
+    // Find or create a new HMM va_block.
+    //
+    // Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
+    // address 'addr' or the VMA does not have at least PROT_READ permission.
+    // Locking: This function must be called with mm retained and locked for
+    // at least read and the va_space lock at least for read.
+    NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
+                                           NvU64 addr,
+                                           uvm_va_block_context_t *va_block_context,
+                                           uvm_va_block_t **va_block_ptr);
+
+    // Reclaim any HMM va_blocks that overlap the given range.
+    // Note that 'end' is inclusive.
+    // A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
+    // See uvm_hmm_vma_is_valid() for details.
+    // Return values:
+    // NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
+    // NV_OK:            There were no HMM blocks in the range, or all HMM
+    //                   blocks in the range were successfully reclaimed.
+    // Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
+    // must hold a reference on it, and it must be locked for at least read
+    // mode. Also, the va_space lock must be held in write mode.
+    // TODO: Bug 3372166: add asynchronous va_block reclaim.
+    NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
+                                       struct mm_struct *mm,
+                                       NvU64 start,
+                                       NvU64 end);
+
+    // Find a HMM policy range that needs to be split. The callback function
+    // 'split_needed_cb' returns true if the policy range needs to be split.
+    // If a policy range is split, the existing range is truncated to
+    // [existing_start, addr - 1] and a new policy node with the same policy
+    // values as the existing node is created covering [addr, existing_end].
+    // Before: [----------- existing ------------]
+    // After:  [---- existing ----][---- new ----]
+    //                             ^addr
+    // Locking: the va_space must be write locked.
+    NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
+                                      NvU64 addr,
+                                      uvm_va_policy_is_split_needed_t split_needed_cb,
+                                      void *data);
+
+    // Set the preferred location policy for the given range.
+    // Note that 'last_address' is inclusive.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked
+    // and the va_space lock must be held in write mode.
+    NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
+                                             uvm_processor_id_t preferred_location,
+                                             NvU64 base,
+                                             NvU64 last_address);
+
+    // Set the accessed by policy for the given range. This also tries to
+    // map the range. Note that 'last_address' is inclusive.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked
+    // and the va_space lock must be held in write mode.
+    NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
+                                      uvm_processor_id_t processor_id,
+                                      bool set_bit,
+                                      NvU64 base,
+                                      NvU64 last_address);
+
+    // Set the read duplication policy for the given range.
+    // Note that 'last_address' is inclusive.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
+    // and the va_space lock must be held in write mode.
+    // TODO: Bug 2046423: need to implement read duplication support in Linux.
+    static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
+                                                  uvm_read_duplication_policy_t new_policy,
+                                                  NvU64 base,
+                                                  NvU64 last_address)
+    {
+        if (!uvm_hmm_is_enabled(va_space))
+            return NV_ERR_INVALID_ADDRESS;
+        return NV_OK;
+    }
+
+    // Set va_block_context->policy to the policy covering the given address
+    // 'addr' and update the ending address '*endp' to the minimum of *endp,
+    // va_block_context->hmm.vma->vm_end - 1, and the ending address of the
+    // policy range.
+    // Locking: This function must be called with
+    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
+    // the va_block lock held.
+    void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                 uvm_va_block_context_t *va_block_context,
+                                 unsigned long addr,
+                                 NvU64 *endp);
+
+    // Find the VMA for the page index 'page_index',
+    // set va_block_context->policy to the policy covering the given address,
+    // and update the ending page range '*outerp' to the minimum of *outerp,
+    // va_block_context->hmm.vma->vm_end - 1, and the ending address of the
+    // policy range.
+    // Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
+    // Locking: This function must be called with
+    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
+    // the va_block lock held.
+    NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
+                                                uvm_va_block_context_t *va_block_context,
+                                                uvm_page_index_t page_index,
+                                                uvm_page_index_t *outerp);
+
+#else // UVM_IS_CONFIG_HMM()
+
+    static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
+    {
+        return false;
+    }
+
+    static bool uvm_hmm_is_enabled_system_wide(void)
+    {
+        return false;
+    }
+
+    static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
+    {
+        return NV_WARN_NOTHING_TO_DO;
+    }
+
+    static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
+    {
+    }
+
+    static NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
+                                           NvU64 addr,
+                                           uvm_va_block_t **va_block_ptr)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
+                                                  NvU64 addr,
+                                                  uvm_va_block_context_t *va_block_context,
+                                                  uvm_va_block_t **va_block_ptr)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
+                                              struct mm_struct *mm,
+                                              NvU64 start,
+                                              NvU64 end)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
+                                             NvU64 addr,
+                                             uvm_va_policy_is_split_needed_t split_needed_cb,
+                                             void *data)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
+                                                    uvm_processor_id_t preferred_location,
+                                                    NvU64 base,
+                                                    NvU64 last_address)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
+                                             uvm_processor_id_t processor_id,
+                                             bool set_bit,
+                                             NvU64 base,
+                                             NvU64 last_address)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
+                                                  uvm_read_duplication_policy_t new_policy,
+                                                  NvU64 base,
+                                                  NvU64 last_address)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                        uvm_va_block_context_t *va_block_context,
+                                        unsigned long addr,
+                                        NvU64 *endp)
+    {
+    }
+
+    static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
+                                                       uvm_va_block_context_t *va_block_context,
+                                                       uvm_page_index_t page_index,
+                                                       uvm_page_index_t *outerp)
+    {
+        return NV_OK;
+    }
+
+#endif // UVM_IS_CONFIG_HMM()
+
+#endif  // _UVM_HMM_H_
--- a/kernel-open/nvidia-uvm/uvm_hmm_sanity_test.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm_sanity_test.c
@@ -0,0 +1,90 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_test.h"
+#include "uvm_va_space.h"
+#include "uvm_va_range.h"
+#include "uvm_hmm.h"
+
+NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    struct mm_struct *mm;
+    uvm_va_block_t *hmm_block = NULL;
+    NV_STATUS status;
+
+    mm = uvm_va_space_mm_retain(va_space);
+    if (!mm)
+        return NV_WARN_NOTHING_TO_DO;
+
+    uvm_down_write_mmap_lock(mm);
+    uvm_va_space_down_write(va_space);
+
+    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
+    // By default, HMM is enabled system wide but disabled per va_space.
+    // This will initialize the va_space for HMM.
+    status = uvm_hmm_va_space_initialize_test(va_space);
+    if (status != NV_OK)
+        goto out;
+
+    uvm_va_space_up_write(va_space);
+    uvm_up_write_mmap_lock(mm);
+
+    uvm_down_read_mmap_lock(mm);
+    uvm_va_space_down_read(va_space);
+
+    // Try to create an HMM va_block to virtual address zero (NULL).
+    // It should fail. There should be no VMA but a va_block for range
+    // [0x0 0x1fffff] is possible.
+    status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
+    TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
+
+    // Try to create an HMM va_block which overlaps a UVM managed block.
+    // It should fail.
+    status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
+    TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
+
+    // Try to create an HMM va_block; it should succeed.
+    status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    // Try to find an existing HMM va_block; it should succeed.
+    status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+done:
+    uvm_va_space_up_read(va_space);
+    uvm_up_read_mmap_lock(mm);
+    uvm_va_space_mm_release(va_space);
+
+    return status;
+
+out:
+    uvm_va_space_up_write(va_space);
+    uvm_up_write_mmap_lock(mm);
+    uvm_va_space_mm_release(va_space);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_host_test.c
+++ b/kernel-open/nvidia-uvm/uvm_host_test.c
@@ -0,0 +1,296 @@
+/*******************************************************************************
+    Copyright (c) 2020-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_global.h"
+#include "uvm_common.h"
+#include "uvm_hal.h"
+#include "uvm_push.h"
+#include "uvm_test.h"
+#include "uvm_va_space.h"
+#include "uvm_mem.h"
+#include "uvm_rm_mem.h"
+
+typedef struct test_sem_mem_t {
+    void *cpu_va;
+    NvU64 gpu_va;
+
+    union {
+        uvm_mem_t *uvm_mem;
+        uvm_rm_mem_t *rm_mem;
+    };
+} test_sem_mem;
+
+static NV_STATUS test_semaphore_alloc_uvm_rm_mem(uvm_gpu_t *gpu, const size_t size, test_sem_mem *mem_out)
+{
+    NV_STATUS status;
+    uvm_rm_mem_t *mem = NULL;
+    NvU64 gpu_va;
+
+    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, &mem);
+    TEST_NV_CHECK_RET(status);
+
+    gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
+    TEST_CHECK_GOTO(gpu_va < gpu->parent->max_host_va, error);
+
+    mem_out->cpu_va = uvm_rm_mem_get_cpu_va(mem);
+    mem_out->gpu_va = gpu_va;
+    mem_out->rm_mem = mem;
+
+    return NV_OK;
+
+error:
+    uvm_rm_mem_free(mem);
+    return status;
+}
+
+static NV_STATUS test_semaphore_alloc_sem(uvm_gpu_t *gpu, const size_t size, test_sem_mem *mem_out)
+{
+    NV_STATUS status = NV_OK;
+    uvm_mem_t *mem = NULL;
+    NvU64 gpu_va;
+
+    TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, current->mm, &mem));
+
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(mem, gpu), error);
+    gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
+
+    // Use an RM allocation when Host cannot address the semaphore.
+    if (gpu_va >= gpu->parent->max_host_va) {
+        uvm_mem_free(mem);
+        return test_semaphore_alloc_uvm_rm_mem(gpu, size, mem_out);
+    }
+
+    // This semaphore resides in the uvm_mem region, i.e., it has the GPU VA
+    // MSbit set. The intent is to validate semaphore operations when the
+    // semaphore's VA is in the high-end of the GPU effective virtual address
+    // space spectrum, i.e., its VA upper-bit is set.
+    TEST_CHECK_GOTO(gpu_va & (1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1)), error);
+
+    TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(mem), error);
+
+    mem_out->cpu_va = uvm_mem_get_cpu_addr_kernel(mem);
+    mem_out->gpu_va = gpu_va;
+    mem_out->uvm_mem = mem;
+
+    return NV_OK;
+
+error:
+    uvm_mem_free(mem);
+    return status;
+}
+
+static void test_semaphore_free_sem(uvm_gpu_t *gpu, test_sem_mem *mem)
+{
+    if (mem->gpu_va >= gpu->parent->uvm_mem_va_base)
+        uvm_mem_free(mem->uvm_mem);
+    else
+        uvm_rm_mem_free(mem->rm_mem);
+}
+
+// This test is similar to the test_semaphore_release() test in uvm_ce_test.c,
+// except that this one uses host_hal->semaphore_release();
+static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    test_sem_mem mem = { 0 };
+    uvm_push_t push;
+    NvU32 value;
+    NvU32 payload = 0xA5A55A5A;
+    NvU32 *cpu_ptr;
+
+    // Semaphore release needs 1 word (4 bytes).
+    const size_t size = sizeof(NvU32);
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_NV_CHECK_RET(status);
+
+    // Initialize the payload.
+    cpu_ptr = (NvU32 *)mem.cpu_va;
+    *cpu_ptr = 0;
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_release test");
+    TEST_NV_CHECK_GOTO(status, done);
+
+    gpu->parent->host_hal->semaphore_release(&push, mem.gpu_va, payload);
+
+    status = uvm_push_end_and_wait(&push);
+    TEST_NV_CHECK_GOTO(status, done);
+
+    value = *cpu_ptr;
+    if (value != payload) {
+        UVM_TEST_PRINT("Semaphore payload = %u instead of %u, GPU %s\n", value, payload, uvm_gpu_name(gpu));
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+done:
+    test_semaphore_free_sem(gpu, &mem);
+
+    return status;
+}
+
+static NV_STATUS test_semaphore_acquire(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    test_sem_mem mem = { 0 };
+    uvm_push_t push;
+    uvm_spin_loop_t spin;
+    NvU32 *cpu_ptr, *cpu_sema_A, *cpu_sema_B, *cpu_sema_C;
+    NvU64 gpu_sema_va_A, gpu_sema_va_B, gpu_sema_va_C;
+    bool check_sema_C;
+
+    // The semaphore is one word long(4 bytes), we use three semaphores.
+    const size_t sema_size = 4;
+    const size_t size = sema_size * 3;
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_NV_CHECK_RET(status);
+
+    gpu_sema_va_A = mem.gpu_va;
+    gpu_sema_va_B = mem.gpu_va + sema_size;
+    gpu_sema_va_C = mem.gpu_va + 2 * sema_size;
+
+    cpu_ptr = (NvU32 *)mem.cpu_va;
+    memset(cpu_ptr, 0, size);
+    cpu_sema_A = cpu_ptr;
+    cpu_sema_B = cpu_ptr + 1;
+    cpu_sema_C = cpu_ptr + 2;
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_acquire test");
+    TEST_NV_CHECK_GOTO(status, done);
+
+    gpu->parent->host_hal->semaphore_release(&push, gpu_sema_va_A, 1);
+    gpu->parent->host_hal->semaphore_acquire(&push, gpu_sema_va_B, 1);
+    gpu->parent->host_hal->semaphore_release(&push, gpu_sema_va_C, 1);
+
+    uvm_push_end(&push);
+
+    // Wait for sema_A release.
+    UVM_SPIN_WHILE(UVM_READ_ONCE(*cpu_sema_A) != 1, &spin);
+
+    // Sleep for 10ms, the GPU waits while sema_B is held by us.
+    msleep(10);
+
+    check_sema_C = UVM_READ_ONCE(*cpu_sema_C) == 0;
+
+    // memory fence/barrier, check comment in
+    // uvm_gpu_semaphore.c:uvm_gpu_semaphore_set_payload() for details.
+    mb();
+
+    // Release sema_B.
+    UVM_WRITE_ONCE(*cpu_sema_B, 1);
+
+    // Wait for the GPU to release sema_C, i.e., the end of the push.
+    status = uvm_push_wait(&push);
+    TEST_CHECK_GOTO(status == NV_OK, done);
+
+    // check_sema_C is validated here to ensure the push has ended and was not
+    // interrupted in the middle, had the check failed.
+    TEST_CHECK_GOTO(check_sema_C, done);
+    TEST_CHECK_GOTO(UVM_READ_ONCE(*cpu_sema_C) == 1, done);
+
+done:
+    test_semaphore_free_sem(gpu, &mem);
+
+    return status;
+}
+
+// This test is similar to the test_semaphore_timestamp() test in
+// uvm_ce_test.c, except that this one uses host_hal->semaphore_timestamp();
+static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+    test_sem_mem mem = { 0 };
+    uvm_push_t push;
+    NvU32 i;
+    NvU64 *timestamp;
+    NvU64 last_timestamp = 0;
+
+    // 2 iterations:
+    //   1: compare retrieved timestamp with 0;
+    //   2: compare retrieved timestamp with previous timestamp (obtained in 1).
+    const NvU32 iterations = 2;
+
+    // The semaphore is 4 words long (16 bytes).
+    const size_t size = 16;
+
+    status = test_semaphore_alloc_sem(gpu, size, &mem);
+    TEST_NV_CHECK_RET(status);
+
+    timestamp = (NvU64 *)mem.cpu_va;
+    TEST_CHECK_GOTO(timestamp != NULL, done);
+    memset(timestamp, 0, size);
+
+    // Shift the timestamp pointer to where the semaphore timestamp info is.
+    timestamp += 1;
+
+    for (i = 0; i < iterations; i++) {
+        status = uvm_push_begin(gpu->channel_manager,
+                                UVM_CHANNEL_TYPE_GPU_INTERNAL,
+                                &push,
+                                "semaphore_timestamp test, iter: %u",
+                                i);
+        TEST_NV_CHECK_GOTO(status, done);
+
+        gpu->parent->host_hal->semaphore_timestamp(&push, mem.gpu_va);
+
+        status = uvm_push_end_and_wait(&push);
+        TEST_NV_CHECK_GOTO(status, done);
+
+        TEST_CHECK_GOTO(*timestamp != 0, done);
+        TEST_CHECK_GOTO(*timestamp >= last_timestamp, done);
+        last_timestamp = *timestamp;
+    }
+
+done:
+    test_semaphore_free_sem(gpu, &mem);
+
+    return status;
+}
+
+static NV_STATUS test_host(uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+
+    for_each_va_space_gpu(gpu, va_space) {
+        TEST_NV_CHECK_RET(test_semaphore_release(gpu));
+        TEST_NV_CHECK_RET(test_semaphore_acquire(gpu));
+        TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_test_host_sanity(UVM_TEST_HOST_SANITY_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_va_space_down_read_rm(va_space);
+
+    status = test_host(va_space);
+
+    uvm_va_space_up_read_rm(va_space);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -0,0 +1,412 @@
+/*******************************************************************************
+    Copyright (c) 2016-2020 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_global.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_rb_tree.h"
+
+// To implement realloc for vmalloc-based allocations we need to track the size
+// of the original allocation. We can do that by allocating a header along with
+// the allocation itself. Since vmalloc is only used for relatively large
+// allocations, this overhead is very small.
+//
+// We don't need this for kmalloc since we can use ksize().
+typedef struct
+{
+    size_t alloc_size;
+    uint8_t ptr[0];
+} uvm_vmalloc_hdr_t;
+
+typedef struct
+{
+    const char *file;
+    const char *function;
+    int line;
+    uvm_rb_tree_node_t node;
+} uvm_kvmalloc_info_t;
+
+typedef enum
+{
+    UVM_KVMALLOC_LEAK_CHECK_NONE = 0,
+    UVM_KVMALLOC_LEAK_CHECK_BYTES,
+    UVM_KVMALLOC_LEAK_CHECK_ORIGIN,
+    UVM_KVMALLOC_LEAK_CHECK_COUNT
+} uvm_kvmalloc_leak_check_t;
+
+// This is used just to make sure that the APIs aren't used outside of
+// uvm_kvmalloc_init/uvm_kvmalloc_exit. The memory allocation would still work
+// fine, but the leak checker would get confused.
+static bool g_malloc_initialized = false;
+
+static struct
+{
+    // Current outstanding bytes allocated
+    atomic_long_t bytes_allocated;
+
+    // Number of allocations made which failed their info allocations. Used just
+    // for sanity checks.
+    atomic_long_t untracked_allocations;
+
+    // Use a raw spinlock rather than a uvm_spinlock_t because the kvmalloc
+    // layer is initialized and torn down before the thread context layer.
+    spinlock_t lock;
+
+    // Table of all outstanding allocations
+    uvm_rb_tree_t allocation_info;
+
+    struct kmem_cache *info_cache;
+} g_uvm_leak_checker;
+
+// Default to byte-count-only leak checking for non-release builds. This can
+// always be overridden by the module parameter.
+static int uvm_leak_checker = (UVM_IS_DEBUG() || UVM_IS_DEVELOP()) ?
+                                UVM_KVMALLOC_LEAK_CHECK_BYTES :
+                                UVM_KVMALLOC_LEAK_CHECK_NONE;
+
+module_param(uvm_leak_checker, int, S_IRUGO);
+MODULE_PARM_DESC(uvm_leak_checker,
+                 "Enable uvm memory leak checking. "
+                 "0 = disabled, 1 = count total bytes allocated and freed, 2 = per-allocation origin tracking.");
+
+NV_STATUS uvm_kvmalloc_init(void)
+{
+    if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
+        spin_lock_init(&g_uvm_leak_checker.lock);
+        uvm_rb_tree_init(&g_uvm_leak_checker.allocation_info);
+
+        g_uvm_leak_checker.info_cache = NV_KMEM_CACHE_CREATE("uvm_kvmalloc_info_t", uvm_kvmalloc_info_t);
+        if (!g_uvm_leak_checker.info_cache)
+            return NV_ERR_NO_MEMORY;
+    }
+
+    g_malloc_initialized = true;
+    return NV_OK;
+}
+
+void uvm_kvmalloc_exit(void)
+{
+    if (!g_malloc_initialized)
+        return;
+
+    if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
+        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
+                      atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
+                      uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
+                        " insmod with uvm_leak_checker=2 for detailed information." :
+                        "");
+        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+
+        if (g_uvm_global.unload_state.ptr)
+            *g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
+    }
+
+    if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
+        uvm_rb_tree_node_t *node, *next;
+
+        uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
+            uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
+
+            printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
+                   uvm_kvsize((void *)((uintptr_t)info->node.key)),
+                   kbasename(info->file),
+                   info->line,
+                   info->function,
+                   info->node.key);
+
+            // Free so we don't keep eating up memory while debugging. Note that
+            // this also removes the entry from the table, frees info, and drops
+            // the allocated bytes count.
+            uvm_kvfree((void *)((uintptr_t)info->node.key));
+        }
+
+        if (atomic_long_read(&g_uvm_leak_checker.untracked_allocations) == 0)
+            UVM_ASSERT(atomic_long_read(&g_uvm_leak_checker.bytes_allocated) == 0);
+
+        kmem_cache_destroy_safe(&g_uvm_leak_checker.info_cache);
+    }
+
+    g_malloc_initialized = false;
+}
+
+static void insert_info(uvm_kvmalloc_info_t *info)
+{
+    NV_STATUS status;
+    unsigned long irq_flags;
+
+    spin_lock_irqsave(&g_uvm_leak_checker.lock, irq_flags);
+    status = uvm_rb_tree_insert(&g_uvm_leak_checker.allocation_info, &info->node);
+    spin_unlock_irqrestore(&g_uvm_leak_checker.lock, irq_flags);
+
+    // We shouldn't have duplicates
+    UVM_ASSERT(status == NV_OK);
+}
+
+static uvm_kvmalloc_info_t *remove_info(void *p)
+{
+    uvm_rb_tree_node_t *node;
+    uvm_kvmalloc_info_t *info = NULL;
+    unsigned long irq_flags;
+
+    spin_lock_irqsave(&g_uvm_leak_checker.lock, irq_flags);
+    node = uvm_rb_tree_find(&g_uvm_leak_checker.allocation_info, (NvU64)p);
+    if (node)
+        uvm_rb_tree_remove(&g_uvm_leak_checker.allocation_info, node);
+    spin_unlock_irqrestore(&g_uvm_leak_checker.lock, irq_flags);
+
+    if (!node) {
+        UVM_ASSERT(atomic_long_read(&g_uvm_leak_checker.untracked_allocations) > 0);
+        atomic_long_dec(&g_uvm_leak_checker.untracked_allocations);
+    }
+    else {
+        info = container_of(node, uvm_kvmalloc_info_t, node);
+        UVM_ASSERT(info->node.key == (NvU64)((uintptr_t)p));
+    }
+    return info;
+}
+
+static void alloc_tracking_add(void *p, const char *file, int line, const char *function)
+{
+    // Add uvm_kvsize(p) instead of size because uvm_kvsize might be larger (due
+    // to ksize), and uvm_kvfree only knows about uvm_kvsize
+    size_t size = uvm_kvsize(p);
+    uvm_kvmalloc_info_t *info;
+
+    UVM_ASSERT(g_malloc_initialized);
+
+    if (ZERO_OR_NULL_PTR(p))
+        return;
+
+    atomic_long_add(size, &g_uvm_leak_checker.bytes_allocated);
+
+    if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
+        // Silently ignore OOM errors
+        info = nv_kmem_cache_zalloc(g_uvm_leak_checker.info_cache, NV_UVM_GFP_FLAGS);
+        if (!info) {
+            atomic_long_inc(&g_uvm_leak_checker.untracked_allocations);
+            return;
+        }
+
+        info->node.key  = (NvU64)p;
+        info->file      = file;
+        info->function  = function;
+        info->line      = line;
+
+        insert_info(info);
+    }
+}
+
+static void alloc_tracking_remove(void *p)
+{
+    size_t size = uvm_kvsize(p);
+    uvm_kvmalloc_info_t *info;
+
+    UVM_ASSERT(g_malloc_initialized);
+
+    if (ZERO_OR_NULL_PTR(p))
+        return;
+
+    atomic_long_sub(size, &g_uvm_leak_checker.bytes_allocated);
+
+    if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
+        info = remove_info(p);
+        if (info)
+            kmem_cache_free(g_uvm_leak_checker.info_cache, info);
+    }
+}
+
+static uvm_vmalloc_hdr_t *get_hdr(void *p)
+{
+    uvm_vmalloc_hdr_t *hdr;
+    UVM_ASSERT(is_vmalloc_addr(p));
+    hdr = container_of(p, uvm_vmalloc_hdr_t, ptr);
+    UVM_ASSERT(hdr->alloc_size > UVM_KMALLOC_THRESHOLD);
+    return hdr;
+}
+
+static void *alloc_internal(size_t size, bool zero_memory)
+{
+    uvm_vmalloc_hdr_t *hdr;
+
+    // Make sure that the allocation pointer is suitably-aligned for a natively-
+    // sized allocation.
+    BUILD_BUG_ON(offsetof(uvm_vmalloc_hdr_t, ptr) != sizeof(void *));
+
+    // Make sure that (sizeof(hdr) + size) is what it should be
+    BUILD_BUG_ON(sizeof(uvm_vmalloc_hdr_t) != offsetof(uvm_vmalloc_hdr_t, ptr));
+
+    if (size <= UVM_KMALLOC_THRESHOLD) {
+        if (zero_memory)
+            return kzalloc(size, NV_UVM_GFP_FLAGS);
+        return kmalloc(size, NV_UVM_GFP_FLAGS);
+    }
+
+    if (zero_memory)
+        hdr = vzalloc(sizeof(*hdr) + size);
+    else
+        hdr = vmalloc(sizeof(*hdr) + size);
+
+    if (!hdr)
+        return NULL;
+
+    hdr->alloc_size = size;
+    return hdr->ptr;
+}
+
+void *__uvm_kvmalloc(size_t size, const char *file, int line, const char *function)
+{
+    void *p = alloc_internal(size, false);
+
+    if (uvm_leak_checker && p)
+        alloc_tracking_add(p, file, line, function);
+
+    return p;
+}
+
+void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *function)
+{
+    void *p = alloc_internal(size, true);
+
+    if (uvm_leak_checker && p)
+        alloc_tracking_add(p, file, line, function);
+
+    return p;
+}
+
+void uvm_kvfree(void *p)
+{
+    if (!p)
+        return;
+
+    if (uvm_leak_checker)
+        alloc_tracking_remove(p);
+
+    if (is_vmalloc_addr(p))
+        vfree(get_hdr(p));
+    else
+        kfree(p);
+}
+
+// Handle reallocs of kmalloc-based allocations
+static void *realloc_from_kmalloc(void *p, size_t new_size)
+{
+    void *new_p;
+
+    // Simple case: kmalloc -> kmalloc
+    if (new_size <= UVM_KMALLOC_THRESHOLD)
+        return krealloc(p, new_size, NV_UVM_GFP_FLAGS);
+
+    // kmalloc -> vmalloc
+    new_p = alloc_internal(new_size, false);
+    if (!new_p)
+        return NULL;
+    memcpy(new_p, p, min(ksize(p), new_size));
+    kfree(p);
+    return new_p;
+}
+
+// Handle reallocs of vmalloc-based allocations
+static void *realloc_from_vmalloc(void *p, size_t new_size)
+{
+    uvm_vmalloc_hdr_t *old_hdr = get_hdr(p);
+    void *new_p;
+
+    if (new_size == 0) {
+        vfree(old_hdr);
+        return ZERO_SIZE_PTR; // What krealloc returns for this case
+    }
+
+    if (new_size == old_hdr->alloc_size)
+        return p;
+
+    // vmalloc has no realloc functionality so we need to do a separate alloc +
+    // copy.
+    new_p = alloc_internal(new_size, false);
+    if (!new_p)
+        return NULL;
+
+    memcpy(new_p, p, min(new_size, old_hdr->alloc_size));
+    vfree(old_hdr);
+    return new_p;
+}
+
+void *__uvm_kvrealloc(void *p, size_t new_size, const char *file, int line, const char *function)
+{
+    void *new_p;
+    uvm_kvmalloc_info_t *info = NULL;
+    size_t old_size;
+
+    if (ZERO_OR_NULL_PTR(p))
+        return __uvm_kvmalloc(new_size, file, line, function);
+
+    old_size = uvm_kvsize(p);
+
+    if (uvm_leak_checker) {
+        // new_size == 0 is a free, so just remove everything
+        if (new_size == 0) {
+            alloc_tracking_remove(p);
+        }
+        else {
+            // Remove the old pointer. If the realloc gives us a new pointer
+            // with the old one still in the tracking table, that pointer could
+            // be reallocated by another thread before we remove it from the
+            // table.
+            atomic_long_sub(old_size, &g_uvm_leak_checker.bytes_allocated);
+            if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN)
+                info = remove_info(p);
+        }
+    }
+
+    if (is_vmalloc_addr(p))
+        new_p = realloc_from_vmalloc(p, new_size);
+    else
+        new_p = realloc_from_kmalloc(p, new_size);
+
+    if (uvm_leak_checker) {
+        if (!new_p) {
+            // The realloc failed, so put the old info back
+            atomic_long_add(old_size, &g_uvm_leak_checker.bytes_allocated);
+            if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN && info)
+                insert_info(info);
+        }
+        else if (new_size != 0) {
+            // Drop the old info and insert the new
+            if (info)
+                kmem_cache_free(g_uvm_leak_checker.info_cache, info);
+            alloc_tracking_add(new_p, file, line, function);
+        }
+    }
+
+    return new_p;
+}
+
+size_t uvm_kvsize(void *p)
+{
+    UVM_ASSERT(g_malloc_initialized);
+    UVM_ASSERT(p);
+    if (is_vmalloc_addr(p))
+        return get_hdr(p)->alloc_size;
+    return ksize(p);
+}
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.h
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_KVMALLOC_H__
+#define __UVM_KVMALLOC_H__
+
+#include "uvm_linux.h"
+#include "uvm_test_ioctl.h"
+
+// kmalloc is faster than vmalloc because it doesn't have to remap kernel
+// virtual memory, but for that same reason it requires physically-contiguous
+// memory. It also supports a native krealloc function which is missing in
+// vmalloc.
+//
+// Therefore the uvm_kvmalloc APIs use kmalloc when possible, but will fall back
+// to vmalloc when the allocation size exceeds this UVM_KMALLOC_THRESHOLD.
+//
+// This value is somewhat arbitrary. kmalloc can support allocations much larger
+// than PAGE_SIZE, but the larger the size the higher the chances of allocation
+// failure.
+//
+// This is in the header so callers can use it to inform their allocation sizes
+// if they wish.
+#define UVM_KMALLOC_THRESHOLD (4*PAGE_SIZE)
+
+NV_STATUS uvm_kvmalloc_init(void);
+void uvm_kvmalloc_exit(void);
+
+// Allocating a size of 0 with any of these APIs returns ZERO_SIZE_PTR
+void *__uvm_kvmalloc(size_t size, const char *file, int line, const char *function);
+void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *function);
+
+#define uvm_kvmalloc(__size) __uvm_kvmalloc(__size, __FILE__, __LINE__, __FUNCTION__)
+#define uvm_kvmalloc_zero(__size) __uvm_kvmalloc_zero(__size, __FILE__, __LINE__, __FUNCTION__)
+
+void uvm_kvfree(void *p);
+
+// Follows standard realloc semantics:
+// - uvm_kvrealloc(NULL, size) and uvm_kvrealloc(ZERO_SIZE_PTR, size) are each
+//   equivalent to uvm_kvmalloc(size)
+// - uvm_kvrealloc(p, 0) is the same as uvm_kvfree(p), and returns ZERO_SIZE_PTR
+void *__uvm_kvrealloc(void *p, size_t new_size, const char *file, int line, const char *function);
+
+#define uvm_kvrealloc(__p, __new_size) __uvm_kvrealloc(__p, __new_size, __FILE__, __LINE__, __FUNCTION__)
+
+// Returns the allocation size for a prior allocation from uvm_kvmalloc,
+// uvm_kvmalloc_zero, or uvm_kvrealloc. This may be more than the size requested
+// in those calls, in which case the extra memory is safe to use.
+//
+// p must not be NULL.
+size_t uvm_kvsize(void *p);
+
+NV_STATUS uvm_test_kvmalloc(UVM_TEST_KVMALLOC_PARAMS *params, struct file *filp);
+
+#endif // __UVM_KVMALLOC_H__
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc_test.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc_test.c
@@ -0,0 +1,184 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_test.h"
+
+typedef enum
+{
+    ALLOC_TYPE_MALLOC,
+    ALLOC_TYPE_ZALLOC,
+    ALLOC_TYPE_REALLOC_NULL,
+    ALLOC_TYPE_REALLOC_ZERO,
+    ALLOC_TYPE_MAX
+} alloc_type_t;
+
+static NV_STATUS check_alloc(void *p, size_t size)
+{
+    if (size == 0) {
+        TEST_CHECK_RET(p == ZERO_SIZE_PTR);
+        TEST_CHECK_RET(uvm_kvsize(p) == 0);
+    }
+    else if (size <= UVM_KMALLOC_THRESHOLD) {
+        TEST_CHECK_RET(!is_vmalloc_addr(p));
+
+        // In theory it's possible to use kmalloc yet have ksize(p) be larger
+        // than our arbitrary UVM_KMALLOC_THRESHOLD. In practice, as long as
+        // UVM_KMALLOC_THRESHOLD is a multiple of PAGE_SIZE, that's highly
+        // unlikely.
+        TEST_CHECK_RET(uvm_kvsize(p) == ksize(p));
+        TEST_CHECK_RET(uvm_kvsize(p) >= size);
+    }
+    else {
+        TEST_CHECK_RET(is_vmalloc_addr(p));
+        TEST_CHECK_RET(uvm_kvsize(p) == size);
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_uvm_kvmalloc(void)
+{
+    static const size_t sizes[] = {0, UVM_KMALLOC_THRESHOLD, UVM_KMALLOC_THRESHOLD + 1};
+    uint8_t *p;
+    uint8_t expected;
+    size_t i, j, size;
+    alloc_type_t alloc_type;
+
+    for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+        size = sizes[i];
+        for (alloc_type = 0; alloc_type < ALLOC_TYPE_MAX; alloc_type++) {
+            switch (alloc_type) {
+                case ALLOC_TYPE_MALLOC:
+                    p = uvm_kvmalloc(size);
+                    break;
+                case ALLOC_TYPE_ZALLOC:
+                    p = uvm_kvmalloc_zero(size);
+                    break;
+                case ALLOC_TYPE_REALLOC_NULL:
+                    p = uvm_kvrealloc(NULL, size);
+                    break;
+                case ALLOC_TYPE_REALLOC_ZERO:
+                    p = uvm_kvrealloc(ZERO_SIZE_PTR, size);
+                    break;
+                default:
+                    UVM_ASSERT(0);
+                    p = NULL;
+            }
+            if (!p)
+                return NV_ERR_NO_MEMORY;
+
+            // On failure, this macro returns and thus leaks the allocation. But
+            // if the check fails, our allocation state is messed up so we can't
+            // reasonably free the allocation anyway.
+            MEM_NV_CHECK_RET(check_alloc(p, size), NV_OK);
+
+            // Scribble on the allocation to make sure we don't crash
+            if (alloc_type == ALLOC_TYPE_ZALLOC) {
+                expected = 0;
+            }
+            else {
+                expected = (uint8_t)(current->pid + i);
+                memset(p, expected, size);
+            }
+
+            for (j = 0; j < size; j++) {
+                if (p[j] != expected) {
+                    UVM_TEST_PRINT("p[%zu] is 0x%x instead of expected value 0x%x\n", j, p[j], expected);
+                    uvm_kvfree(p);
+                    TEST_CHECK_RET(0);
+                }
+            }
+
+            uvm_kvfree(p);
+        }
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_uvm_kvrealloc(void)
+{
+    size_t i, j, k, old_size, new_size;
+    uint8_t *old_p, *new_p;
+    uint8_t expected = (uint8_t)current->pid;
+
+    static const size_t sizes[] = {0,
+                                   UVM_KMALLOC_THRESHOLD / 2,
+                                   UVM_KMALLOC_THRESHOLD,
+                                   UVM_KMALLOC_THRESHOLD + 1,
+                                   2*UVM_KMALLOC_THRESHOLD};
+
+    // uvm_kvrealloc(NULL, size) and uvm_kvrealloc(ZERO_SIZE_PTR, size) are
+    // tested in test_uvm_alloc so we don't have to do them here.
+
+    for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+        old_size = sizes[i];
+        for (j = 0; j < ARRAY_SIZE(sizes); j++) {
+            new_size = sizes[j];
+
+            old_p = uvm_kvmalloc(old_size);
+            if (!old_p)
+                return NV_ERR_NO_MEMORY;
+            MEM_NV_CHECK_RET(check_alloc(old_p, old_size), NV_OK);
+
+            ++expected;
+            memset(old_p, expected, old_size);
+
+            new_p = uvm_kvrealloc(old_p, new_size);
+            if (!new_p) {
+                uvm_kvfree(old_p);
+                return NV_ERR_NO_MEMORY;
+            }
+
+            // At this point, either new_p == old_p or old_p should have been
+            // freed. In either case there's no need to free old_p.
+
+            MEM_NV_CHECK_RET(check_alloc(new_p, new_size), NV_OK);
+
+            // Make sure the data is still present
+            for (k = 0; k < min(new_size, old_size); k++) {
+                if (new_p[k] != expected) {
+                    UVM_TEST_PRINT("new_p[%zu] is 0x%x instead of expected value 0x%x\n", k, new_p[k], expected);
+                    uvm_kvfree(new_p);
+                    TEST_CHECK_RET(0);
+                }
+            }
+
+            // Exercise the free-via-realloc path
+            TEST_CHECK_RET(uvm_kvrealloc(new_p, 0) == ZERO_SIZE_PTR);
+        }
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_test_kvmalloc(UVM_TEST_KVMALLOC_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status = test_uvm_kvmalloc();
+    if (status != NV_OK)
+        return status;
+    return test_uvm_kvrealloc();
+}
--- a/kernel-open/nvidia-uvm/uvm_linux.c
+++ b/kernel-open/nvidia-uvm/uvm_linux.c
@@ -0,0 +1,81 @@
+/*******************************************************************************
+    Copyright (c) 2013 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_linux.h"
+#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
+#include <linux/memcontrol.h>
+#include <linux/sched/mm.h>
+#endif
+
+//
+// uvm_linux.c
+//
+// This file, along with conftest.h and umv_linux.h, helps to insulate
+// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
+//
+
+#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
+void address_space_init_once(struct address_space *mapping)
+{
+    memset(mapping, 0, sizeof(*mapping));
+    INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+
+#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
+    //
+    // The .tree_lock member variable was changed from type rwlock_t, to
+    // spinlock_t, on 25 July 2008, by mainline commit
+    // 19fd6231279be3c3bdd02ed99f9b0eb195978064.
+    //
+    rwlock_init(&mapping->tree_lock);
+#else
+    spin_lock_init(&mapping->tree_lock);
+#endif
+
+    spin_lock_init(&mapping->i_mmap_lock);
+    INIT_LIST_HEAD(&mapping->private_list);
+    spin_lock_init(&mapping->private_lock);
+    INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+    INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+}
+#endif
+
+#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
+void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
+{
+    memset(context, 0, sizeof(*context));
+    if (!mm)
+        return;
+
+    context->new_memcg = get_mem_cgroup_from_mm(mm);
+    context->old_memcg = set_active_memcg(context->new_memcg);
+}
+
+void uvm_memcg_context_end(uvm_memcg_context_t *context)
+{
+    if (!context->new_memcg)
+        return;
+
+    set_active_memcg(context->old_memcg);
+    mem_cgroup_put(context->new_memcg);
+}
+#endif
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -0,0 +1,622 @@
+/*******************************************************************************
+    Copyright (c) 2013-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+//
+// uvm_linux.h
+//
+// This file, along with conftest.h and umv_linux.c, helps to insulate
+// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
+//
+//
+
+#ifndef _UVM_LINUX_H
+#define _UVM_LINUX_H
+
+#include "nvtypes.h"
+
+#include "nv-time.h"
+
+#define NV_BUILD_MODULE_INSTANCES 0
+#include "nv-linux.h"
+
+#if defined(NV_LINUX_LOG2_H_PRESENT)
+#include <linux/log2.h>
+#endif
+#if defined(NV_PRIO_TREE_PRESENT)
+#include <linux/prio_tree.h>
+#endif
+
+#include <linux/jhash.h>
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+
+#if defined(NV_ASM_BARRIER_H_PRESENT)
+#include <asm/barrier.h>
+#endif
+
+#if defined(NV_LINUX_ATOMIC_H_PRESENT)
+#include <linux/atomic.h>
+#endif
+
+#include <asm/current.h>
+
+#include <linux/random.h>           /* get_random_bytes()               */
+#include <linux/radix-tree.h>       /* Linux kernel radix tree          */
+
+#include <linux/file.h>             /* fget()                           */
+
+#include <linux/percpu.h>
+
+#if defined(NV_LINUX_PRINTK_H_PRESENT)
+#include <linux/printk.h>
+#endif
+
+#if defined(NV_LINUX_RATELIMIT_H_PRESENT)
+#include <linux/ratelimit.h>
+#endif
+
+#if defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT)
+#include <asm/powernv.h>
+#endif
+
+#if defined(NV_LINUX_SCHED_TASK_STACK_H_PRESENT)
+#include <linux/sched/task_stack.h>
+#endif
+
+#include <linux/cpumask.h>
+#include <linux/topology.h>
+
+#include "nv-kthread-q.h"
+
+
+    #if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
+        #define UVM_THREAD_AFFINITY_SUPPORTED() 1
+    #else
+        #define UVM_THREAD_AFFINITY_SUPPORTED() 0
+    #endif
+
+
+
+
+// The ARM arch lacks support for cpumask_of_node() until kernel 4.7. It was
+// added via commit1a2db300348b ("arm64, numa: Add NUMA support for arm64
+// platforms.") Callers should either check UVM_THREAD_AFFINITY_SUPPORTED()
+// prior to calling this function of be prepared to deal with a NULL CPU
+// mask.
+static inline const struct cpumask *uvm_cpumask_of_node(int node)
+{
+#ifdef NV_CPUMASK_OF_NODE_PRESENT
+    return cpumask_of_node(node);
+#else
+    return NULL;
+#endif
+}
+
+
+    #if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
+        #define UVM_IS_CONFIG_HMM() 1
+    #else
+        #define UVM_IS_CONFIG_HMM() 0
+    #endif
+
+
+
+
+// Various issues prevent us from using mmu_notifiers in older kernels. These
+// include:
+//  - ->release being called under RCU instead of SRCU: fixed by commit
+//    21a92735f660eaecf69a6f2e777f18463760ec32, v3.7 (2012-10-08).
+//  - Race conditions between mmu_notifier_release and mmu_notifier_unregister:
+//    fixed by commit d34883d4e35c0a994e91dd847a82b4c9e0c31d83, v3.10
+//    (2013-05-24).
+//
+// Unfortunately these issues aren't conftest-able, so instead we look for the
+// presence of the invalidate_range callback in mmu_notifier_ops. This was added
+// after all of the above issues were resolved, so we assume the fixes are
+// present if we see the callback.
+//
+// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
+// v3.19 (2014-11-13).
+
+    #if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
+        #define UVM_CAN_USE_MMU_NOTIFIERS() 1
+    #else
+        #define UVM_CAN_USE_MMU_NOTIFIERS() 0
+    #endif
+
+
+
+
+
+
+
+
+
+// See bug 1707453 for further details about setting the minimum kernel version.
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#  error This driver does not support kernels older than 2.6.32!
+#endif
+
+#if !defined(VM_RESERVED)
+#define VM_RESERVED    0x00000000
+#endif
+#if !defined(VM_DONTEXPAND)
+#define VM_DONTEXPAND  0x00000000
+#endif
+#if !defined(VM_DONTDUMP)
+#define VM_DONTDUMP    0x00000000
+#endif
+#if !defined(VM_MIXEDMAP)
+#define VM_MIXEDMAP    0x00000000
+#endif
+
+//
+// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
+// routines pick up our version
+//
+#undef pr_fmt
+#define NVIDIA_UVM_PRETTY_PRINTING_PREFIX "nvidia-uvm: "
+#define pr_fmt(fmt) NVIDIA_UVM_PRETTY_PRINTING_PREFIX fmt
+
+// Dummy printing function that maintains syntax and format specifier checking
+// but doesn't print anything and doesn't evaluate the print parameters. This is
+// roughly equivalent to the kernel's no_printk function. We use this instead
+// because:
+// 1) no_printk was not available until 2.6.36
+// 2) Until 4.5 no_printk was implemented as a static function, meaning its
+//    parameters were always evaluated
+#define UVM_NO_PRINT(fmt, ...)          \
+    do {                                \
+        if (0)                          \
+            printk(fmt, ##__VA_ARGS__); \
+    } while (0)
+
+// printk_ratelimited was added in 2.6.33 via commit
+// 8a64f336bc1d4aa203b138d29d5a9c414a9fbb47. If not available, we prefer not
+// printing anything since it's supposed to be rate-limited.
+#if !defined(printk_ratelimited)
+    #define printk_ratelimited UVM_NO_PRINT
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
+    // Just too much compilation trouble with the rate-limiting printk feature
+    // until about k3.8. Because the non-rate-limited printing will cause
+    // surprises and problems, just turn it off entirely in this situation.
+    //
+    #undef pr_debug_ratelimited
+    #define pr_debug_ratelimited UVM_NO_PRINT
+#endif
+
+#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
+#if !defined(pmd_large)
+#define pmd_large(_pmd) \
+    ((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
+#endif
+#endif /* defined(NVCPU_X86) || defined(NVCPU_X86_64) */
+
+#if !defined(GFP_DMA32)
+/*
+ * GFP_DMA32 is similar to GFP_DMA, but instructs the Linux zone
+ * allocator to allocate memory from the first 4GB on platforms
+ * such as Linux/x86-64; the alternative is to use an IOMMU such
+ * as the one implemented with the K8 GART, if available.
+ */
+#define GFP_DMA32 0
+#endif
+
+#if !defined(__GFP_NOWARN)
+#define __GFP_NOWARN 0
+#endif
+
+#if !defined(__GFP_NORETRY)
+#define __GFP_NORETRY 0
+#endif
+
+#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NORETRY)
+
+#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
+    void address_space_init_once(struct address_space *mapping);
+#endif
+
+// Develop builds define DEBUG but enable optimization
+#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
+  // Wrappers for functions not building correctly without optimizations on,
+  // implemented in uvm_debug_optimized.c. Notably the file is only built for
+  // debug builds, not develop or release builds.
+
+  // Unoptimized builds of atomic_xchg() hit a BUILD_BUG() on arm64 as it relies
+  // on __xchg being completely inlined:
+  //   /usr/src/linux-3.12.19/arch/arm64/include/asm/cmpxchg.h:67:3: note: in expansion of macro 'BUILD_BUG'
+  //
+  // Powerppc hits a similar issue, but ends up with an undefined symbol:
+  //   WARNING: "__xchg_called_with_bad_pointer" [...] undefined!
+  int nv_atomic_xchg(atomic_t *val, int new);
+
+  // Same problem as atomic_xchg() on powerppc:
+  //   WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
+  int nv_atomic_cmpxchg(atomic_t *val, int old, int new);
+
+  // Same problem as atomic_xchg() on powerppc:
+  //   WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
+  long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new);
+
+  // This Linux kernel commit:
+  // 2016-08-30  0d025d271e55f3de21f0aaaf54b42d20404d2b23
+  // leads to build failures on x86_64, when compiling without optimization. Avoid
+  // that problem, by providing our own builds of copy_from_user / copy_to_user,
+  // for debug (non-optimized) UVM builds. Those are accessed via these
+  // nv_copy_to/from_user wrapper functions.
+  //
+  // Bug 1849583 has further details.
+  unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n);
+  unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+#else
+  #define nv_atomic_xchg            atomic_xchg
+  #define nv_atomic_cmpxchg         atomic_cmpxchg
+  #define nv_atomic_long_cmpxchg    atomic_long_cmpxchg
+  #define nv_copy_to_user           copy_to_user
+  #define nv_copy_from_user         copy_from_user
+#endif
+
+#ifndef NV_ALIGN_DOWN
+#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
+#endif
+
+#if defined(NVCPU_X86)
+/* Some old IA32 kernels don't have 64/64 division routines,
+ * they only support 64/32 division with do_div(). */
+static inline uint64_t NV_DIV64(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
+{
+    /* do_div() only accepts a 32-bit divisor */
+    *remainder = do_div(dividend, (uint32_t)divisor);
+
+    /* do_div() modifies the dividend in-place */
+    return dividend;
+}
+#else
+/* All other 32/64-bit kernels we support (including non-x86 kernels) support
+ * 64/64 division. */
+static inline uint64_t NV_DIV64(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
+{
+    *remainder = dividend % divisor;
+
+    return dividend / divisor;
+}
+#endif
+
+#if defined(CLOCK_MONOTONIC_RAW)
+/* Return a nanosecond-precise value */
+static inline NvU64 NV_GETTIME(void)
+{
+    struct timespec64 tm;
+
+    ktime_get_raw_ts64(&tm);
+    return (NvU64) timespec64_to_ns(&tm);
+}
+#else
+/* We can only return a microsecond-precise value with the
+ * available non-GPL symbols. */
+static inline NvU64 NV_GETTIME(void)
+{
+    struct timespec64 tm;
+
+    ktime_get_real_ts64(&tm);
+    return (NvU64) timespec64_to_ns(&tm);
+}
+#endif
+
+#if !defined(ilog2)
+    static inline int NV_ILOG2_U32(u32 n)
+    {
+        return fls(n) - 1;
+    }
+    static inline int NV_ILOG2_U64(u64 n)
+    {
+        return fls64(n) - 1;
+    }
+    #define ilog2(n) (sizeof(n) <= 4 ? NV_ILOG2_U32(n) : NV_ILOG2_U64(n))
+#endif
+
+// for_each_bit added in 2.6.24 via commit 3e037454bcfa4b187e8293d2121bd8c0f5a5c31c
+// later renamed in 2.6.34 via commit 984b3f5746ed2cde3d184651dabf26980f2b66e5
+#if !defined(for_each_set_bit)
+    #define for_each_set_bit(bit, addr, size) for_each_bit((bit), (addr), (size))
+#endif
+
+// for_each_set_bit_cont was added in 3.2 via 1e2ad28f80b4e155678259238f51edebc19e4014
+// It was renamed to for_each_set_bit_from in 3.3 via 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df
+#if !defined(for_each_set_bit_from)
+    #define for_each_set_bit_from(bit, addr, size)              \
+        for ((bit) = find_next_bit((addr), (size), (bit));      \
+             (bit) < (size);                                    \
+             (bit) = find_next_bit((addr), (size), (bit) + 1))
+#endif
+
+// for_each_clear_bit and for_each_clear_bit_from were added in 3.10 via
+// 03f4a8226c2f9c14361f75848d1e93139bab90c4
+#if !defined(for_each_clear_bit)
+    #define for_each_clear_bit(bit, addr, size)                     \
+        for ((bit) = find_first_zero_bit((addr), (size));           \
+             (bit) < (size);                                        \
+             (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+#endif
+
+#if !defined(for_each_clear_bit_from)
+    #define for_each_clear_bit_from(bit, addr, size)                \
+        for ((bit) = find_next_zero_bit((addr), (size), (bit));     \
+             (bit) < (size);                                        \
+             (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+#endif
+
+// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
+#if !defined(NV_BITMAP_CLEAR_PRESENT)
+    static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
+    {
+        unsigned int index = start;
+        for_each_set_bit_from(index, map, start + len)
+            __clear_bit(index, map);
+    }
+
+    static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
+    {
+        unsigned int index = start;
+        for_each_clear_bit_from(index, map, start + len)
+            __set_bit(index, map);
+    }
+#endif
+
+// Added in 2.6.24
+#ifndef ACCESS_ONCE
+  #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+#endif
+
+// WRITE_ONCE/READ_ONCE have incompatible definitions across versions, which produces warnings.
+// Therefore, we define our own macros
+#define UVM_WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+#define UVM_READ_ONCE(x) ACCESS_ONCE(x)
+
+// smp_mb__before_atomic was added in 3.16, provide a fallback
+#ifndef smp_mb__before_atomic
+  #if NVCPU_IS_X86 || NVCPU_IS_X86_64
+    // That's what the kernel does for x86
+    #define smp_mb__before_atomic() barrier()
+  #else
+    // That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
+    #define smp_mb__before_atomic() smp_mb()
+  #endif
+#endif
+
+// smp_mb__after_atomic was added in 3.16, provide a fallback
+#ifndef smp_mb__after_atomic
+  #if NVCPU_IS_X86 || NVCPU_IS_X86_64
+    // That's what the kernel does for x86
+    #define smp_mb__after_atomic() barrier()
+  #else
+    // That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
+    #define smp_mb__after_atomic() smp_mb()
+  #endif
+#endif
+
+// smp_load_acquire and smp_store_release were added in commit
+// 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 ("arch: Introduce
+// smp_load_acquire(), smp_store_release()") in v3.14 (2013-11-06).
+#ifndef smp_load_acquire
+    #define smp_load_acquire(p)                     \
+        ({                                          \
+            typeof(*(p)) __v = UVM_READ_ONCE(*(p)); \
+            smp_mb();                               \
+            __v;                                    \
+        })
+#endif
+
+#ifndef smp_store_release
+    #define smp_store_release(p, v)     \
+        do {                            \
+            smp_mb();                   \
+            UVM_WRITE_ONCE(*(p), v);    \
+        } while (0)
+#endif
+
+// atomic_read_acquire and atomic_set_release were added in commit
+// 654672d4ba1a6001c365833be895f9477c4d5eab ("locking/atomics:
+// Add _{acquire|release|relaxed}() variants of some atomic operations") in v4.3
+// (2015-08-06).
+#ifndef atomic_read_acquire
+    #define atomic_read_acquire(p) smp_load_acquire(&(p)->counter)
+#endif
+
+#ifndef atomic_set_release
+    #define atomic_set_release(p, v) smp_store_release(&(p)->counter, v)
+#endif
+
+
+// Added in 3.11
+#ifndef PAGE_ALIGNED
+    #define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
+#endif
+
+// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
+#if !defined(NV_VZALLOC_PRESENT)
+    static inline void *vzalloc(unsigned long size)
+    {
+        void *p = vmalloc(size);
+        if (p)
+            memset(p, 0, size);
+        return p;
+    }
+#endif
+
+// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
+#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
+    #define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
+        wait_on_bit_lock(word, bit, mode)
+#elif (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 4)
+    static __sched int uvm_bit_wait(void *word)
+    {
+        if (signal_pending_state(current->state, current))
+            return 1;
+        schedule();
+        return 0;
+    }
+    #define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
+        wait_on_bit_lock(word, bit, uvm_bit_wait, mode)
+#else
+#error "Unknown number of arguments"
+#endif
+
+static void uvm_init_radix_tree_preloadable(struct radix_tree_root *tree)
+{
+    // GFP_NOWAIT, or some combination of flags that avoids setting
+    // __GFP_DIRECT_RECLAIM (__GFP_WAIT prior to commit
+    // d0164adc89f6bb374d304ffcc375c6d2652fe67d from Nov 2015), is required for
+    // using radix_tree_preload() for the tree.
+    INIT_RADIX_TREE(tree, GFP_NOWAIT);
+}
+
+#if !defined(NV_RADIX_TREE_EMPTY_PRESENT)
+static bool radix_tree_empty(struct radix_tree_root *tree)
+{
+    void *dummy;
+    return radix_tree_gang_lookup(tree, &dummy, 0, 1) == 0;
+}
+#endif
+
+// The radix tree root parameter was added to radix_tree_replace_slot in 4.10.
+// That same change moved radix_tree_replace_slot from a header-only
+// implementation to a .c file, but the symbol wasn't exported until later so
+// we cannot use the function on 4.10. UVM uses this macro to ensure that
+// radix_tree_replace_slot is not called when using that kernel.
+#ifndef NV_RADIX_TREE_REPLACE_SLOT_PRESENT
+    #define NV_RADIX_TREE_REPLACE_SLOT(...) \
+        UVM_ASSERT_MSG(false, "radix_tree_replace_slot cannot be used in 4.10\n");
+#else
+#if (NV_RADIX_TREE_REPLACE_SLOT_ARGUMENT_COUNT == 2)
+    #define NV_RADIX_TREE_REPLACE_SLOT(root, slot, entry) \
+        radix_tree_replace_slot((slot), (entry))
+#elif  (NV_RADIX_TREE_REPLACE_SLOT_ARGUMENT_COUNT == 3)
+    #define NV_RADIX_TREE_REPLACE_SLOT(root, slot, entry) \
+        radix_tree_replace_slot((root), (slot), (entry))
+#else
+#error "Unknown number of arguments"
+#endif
+#endif
+
+#if !defined(NV_USLEEP_RANGE_PRESENT)
+static void __sched usleep_range(unsigned long min, unsigned long max)
+{
+    unsigned min_msec = min / 1000;
+    unsigned max_msec = max / 1000;
+
+    if (min_msec != 0)
+        msleep(min_msec);
+    else if (max_msec != 0)
+        msleep(max_msec);
+    else
+        msleep(1);
+}
+#endif
+
+typedef struct
+{
+    struct mem_cgroup *new_memcg;
+    struct mem_cgroup *old_memcg;
+} uvm_memcg_context_t;
+
+
+    // cgroup support requires set_active_memcg(). set_active_memcg() is an
+    // inline function that requires int_active_memcg per-cpu symbol when called
+    // from interrupt context. int_active_memcg is only exported by commit
+    // c74d40e8b5e2a on >= 5.14 kernels.
+    #if NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
+        #define UVM_CGROUP_ACCOUNTING_SUPPORTED() 1
+        #define NV_UVM_GFP_FLAGS_ACCOUNT              (NV_UVM_GFP_FLAGS | __GFP_ACCOUNT)
+
+        // Begin a Cgroup accounting context.
+        // All sysmem page allocations done with NV_UVM_ACCOUNT_GFP_FLAGS will be
+        // charged to the mm's memory control group.
+        //
+        // If mm is NULL, the accounting context will not be switched. Please, note
+        // that in this case, any allocations which include NV_UVM_ACCOUNT_GFP_FLAGS
+        // will be charged to the currently active context.
+        //
+        // Locking: uvm_memcg_context_t does not maintain its own locking. Callers must
+        //          ensure that concurrent calls do not operate on the same context.
+        void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm);
+
+        // End the Cgroup accounting context started with uvm_mem_memcg_context_start().
+        // After this call, the previously active memory control group will be restored.
+        //
+        // Locking: Callers must ensure that concurrent calls do not operate on the same
+        //          context.
+        void uvm_memcg_context_end(uvm_memcg_context_t *context);
+    #else // !NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
+        #define UVM_CGROUP_ACCOUNTING_SUPPORTED() 0
+        #define NV_UVM_GFP_FLAGS_ACCOUNT              (NV_UVM_GFP_FLAGS)
+
+        static inline void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
+        {
+            return;
+        }
+
+        static inline void uvm_memcg_context_end(uvm_memcg_context_t *context)
+        {
+            return;
+        }
+    #endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
+//
+// Archs with CONFIG_MMU should have their own page.h, and can't include
+// asm-generic/page.h. However, x86, powerpc, arm64 don't define page_to_virt()
+// macro in their version of page.h.
+#include <linux/mm.h>
+#ifndef page_to_virt
+  #include <asm/page.h>
+  #define page_to_virt(x)    __va(PFN_PHYS(page_to_pfn(x)))
+#endif
+#endif // _UVM_LINUX_H
--- a/kernel-open/nvidia-uvm/uvm_linux_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_linux_ioctl.h
@@ -0,0 +1,42 @@
+/*******************************************************************************
+    Copyright (c) 2013 NVidia Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*******************************************************************************/
+
+#ifndef _UVM_LINUX_IOCTL_H
+#define _UVM_LINUX_IOCTL_H
+
+#include "uvm_ioctl.h"
+
+// This ioctl must be the first operation performed on the UVM file descriptor
+// after opening it. Until this ioctl is made, the UVM file descriptor is
+// inoperable: all other ioctls will return NV_ERR_ILLEGAL_ACTION and mmap will
+// return EBADFD.
+#define UVM_INITIALIZE                                                0x30000001
+
+typedef struct
+{
+    NvU64     flags     NV_ALIGN_BYTES(8); // IN
+    NV_STATUS rmStatus;                    // OUT
+} UVM_INITIALIZE_PARAMS;
+
+#define UVM_DEINITIALIZE                                              0x30000002
+
+#endif // _UVM_LINUX_IOCTL_H
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@@ -0,0 +1,380 @@
+/*******************************************************************************
+    Copyright (c) 2015-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_lock.h"
+#include "uvm_thread_context.h"
+#include "uvm_kvmalloc.h"
+
+const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
+{
+
+
+
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26);
+
+
+    switch (lock_order) {
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_EXT_RANGE_TREE);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK);
+
+
+
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
+
+
+
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_EVENTS);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_TOOLS);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_LEAF);
+        UVM_ENUM_STRING_DEFAULT();
+    }
+}
+
+bool __uvm_record_lock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    bool correct = true;
+    uvm_lock_order_t conflicting_order;
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+    uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
+    bool trylock = (flags & UVM_LOCK_FLAGS_TRYLOCK);
+
+    UVM_ASSERT(mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE || mode_flags == UVM_LOCK_FLAGS_MODE_SHARED);
+
+    if (!uvm_context) {
+        UVM_ERR_PRINT("Failed to acquire the thread context when recording lock of %s\n",
+                      uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (lock_order == UVM_LOCK_ORDER_INVALID) {
+        UVM_ERR_PRINT("Acquiring a lock (0x%llx) with an invalid lock order\n", (NvU64)lock);
+        return false;
+    }
+
+    // TODO: Bug 1799173: Hack in special rules for the RM locks so we don't add
+    //       any new invalid uses while we figure out a better way to handle
+    //       these dependencies.
+    if (lock_order == UVM_LOCK_ORDER_RM_GPUS) {
+        if (test_bit(UVM_LOCK_ORDER_MMAP_LOCK, uvm_context->acquired_lock_orders)) {
+            UVM_ERR_PRINT("Acquiring RM GPU lock with mmap_lock held\n");
+            correct = false;
+        }
+
+        if (test_bit(UVM_LOCK_ORDER_VA_SPACE, uvm_context->exclusive_acquired_lock_orders)) {
+            UVM_ERR_PRINT("Acquiring RM GPU lock with VA space lock held in write mode\n");
+            correct = false;
+        }
+        else if (test_bit(UVM_LOCK_ORDER_VA_SPACE, uvm_context->acquired_lock_orders) &&
+                 !test_bit(UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS, uvm_context->acquired_lock_orders)) {
+            UVM_ERR_PRINT("Acquiring RM GPU lock with the VA space lock held in read mode, but without the VA space writer serialization lock held\n");
+            correct = false;
+        }
+    }
+
+    conflicting_order = find_next_bit(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT, lock_order);
+    if (conflicting_order != UVM_LOCK_ORDER_COUNT) {
+        if (trylock) {
+            // If the lock attempt is a trylock, i.e. non-blocking, then
+            // out-of-order lock acquisition is acceptable.  Record it
+            // to enable __uvm_record_unlock() to skip enforcing in-order
+            // lock release for this lock order.
+            __set_bit(lock_order, uvm_context->out_of_order_acquired_lock_orders);
+        } else {
+            correct = false;
+            // Equivalent order is not necessarily incorrect. However, it is not yet supported,
+            // and is therefore treated as an error case.
+            UVM_ERR_PRINT("Already acquired equivalent or deeper lock %s when trying to acquire %s\n",
+                          uvm_lock_order_to_string(conflicting_order),
+                          uvm_lock_order_to_string(lock_order));
+        }
+    }
+
+    __set_bit(lock_order, uvm_context->acquired_lock_orders);
+
+    if (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
+        __set_bit(lock_order, uvm_context->exclusive_acquired_lock_orders);
+
+    uvm_context->acquired[lock_order] = lock;
+
+    return correct;
+}
+
+bool __uvm_record_unlock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    bool correct = true;
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+    uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
+    bool exclusive = (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE);
+    bool out_of_order = (flags & UVM_LOCK_FLAGS_OUT_OF_ORDER);
+
+    UVM_ASSERT(mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE || mode_flags == UVM_LOCK_FLAGS_MODE_SHARED);
+
+    if (!uvm_context) {
+        UVM_ERR_PRINT("Failed to acquire the thread context when recording unlock of %s\n",
+                      uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (lock_order == UVM_LOCK_ORDER_INVALID) {
+        UVM_ERR_PRINT("Releasing a lock (0x%llx) with an invalid lock order\n", (NvU64)lock);
+        return false;
+    }
+
+    // Releasing a lock out of order is not incorrect, but often points to
+    // issues.  Consider it an error by default, unless the lock was
+    // legally acquired out-of-order via trylock, in which case out-of-order
+    // lock release is expected.  But also give an option to opt out of
+    // enforcing in-order lock release, if needed.
+    if (!__test_and_clear_bit(lock_order, uvm_context->out_of_order_acquired_lock_orders) && !out_of_order) {
+        uvm_lock_order_t deeper_order = find_next_bit(uvm_context->acquired_lock_orders,
+                                                      UVM_LOCK_ORDER_COUNT, lock_order + 1);
+        if (deeper_order != UVM_LOCK_ORDER_COUNT) {
+            correct = false;
+            UVM_ERR_PRINT("Releasing lock %s while still holding %s\n",
+                          uvm_lock_order_to_string(lock_order),
+                          uvm_lock_order_to_string(deeper_order));
+        }
+    }
+
+    if (!__test_and_clear_bit(lock_order, uvm_context->acquired_lock_orders)) {
+        correct = false;
+        UVM_ERR_PRINT("Releasing lock %s that's not held\n", uvm_lock_order_to_string(lock_order));
+    }
+    else if (uvm_context->acquired[lock_order] != lock) {
+        correct = false;
+        UVM_ERR_PRINT("Releasing a different instance of lock %s than held, held 0x%llx releasing 0x%llx\n",
+                      uvm_lock_order_to_string(lock_order),
+                      (NvU64)uvm_context->acquired[lock_order],
+                      (NvU64)lock);
+    }
+    else if (!!__test_and_clear_bit(lock_order, uvm_context->exclusive_acquired_lock_orders) != exclusive) {
+        correct = false;
+        UVM_ERR_PRINT("Releasing lock %s as %s while it was acquired as %s\n",
+                      uvm_lock_order_to_string(lock_order),
+                      exclusive ? "exclusive" : "shared", exclusive ? "shared" : "exclusive");
+    }
+    uvm_context->acquired[lock_order] = NULL;
+
+    return correct;
+}
+
+bool __uvm_record_downgrade(void *lock, uvm_lock_order_t lock_order)
+{
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+
+    if (!uvm_context) {
+        UVM_ERR_PRINT("Failed to acquire the thread context when recording downgrade of %s\n",
+                      uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (!__uvm_check_locked(lock, lock_order, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)) {
+        UVM_ERR_PRINT("Lock %s is not held in exclusive mode: downgrading failed\n",
+                      uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+
+    clear_bit(lock_order, uvm_context->exclusive_acquired_lock_orders);
+    return true;
+}
+
+bool __uvm_check_locked(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+    uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
+    bool exclusive = (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE);
+
+    if (!uvm_context) {
+        UVM_ERR_PRINT("Failed to acquire the thread context when checking that lock %s is locked\n",
+                      uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (!test_bit(lock_order, uvm_context->acquired_lock_orders)) {
+        UVM_ERR_PRINT("No lock with order %s acquired at all\n", uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+    if (uvm_context->acquired[lock_order] != lock) {
+        UVM_ERR_PRINT("Different instance of lock %s acquired, 0x%llx != 0x%llx\n",
+                      uvm_lock_order_to_string(lock_order),
+                      (NvU64)lock,
+                      (NvU64)uvm_context->acquired[lock_order]);
+        return false;
+    }
+
+    if (mode_flags != UVM_LOCK_FLAGS_MODE_ANY &&
+        !!test_bit(lock_order, uvm_context->exclusive_acquired_lock_orders) != exclusive) {
+        UVM_ERR_PRINT("Lock %s acquired in %s mode instead of %s mode\n",
+                      uvm_lock_order_to_string(lock_order),
+                      exclusive ? "shared" : "exclusive", exclusive ? "exclusive" : "shared");
+        return false;
+    }
+
+    return true;
+}
+
+bool __uvm_locking_initialized(void)
+{
+    return uvm_thread_context_global_initialized();
+}
+
+bool __uvm_check_lockable_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    uvm_lock_order_t conflicting_order;
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+    bool trylock = (flags & UVM_LOCK_FLAGS_TRYLOCK);
+
+    if (!uvm_context)
+        return true;
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (lock_order == UVM_LOCK_ORDER_INVALID) {
+        UVM_ERR_PRINT("Checking for an invalid lock order\n");
+        return false;
+    }
+
+    if (!trylock) {
+        conflicting_order = find_next_bit(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT, lock_order);
+        if (conflicting_order != UVM_LOCK_ORDER_COUNT) {
+            UVM_ERR_PRINT("Acquired equivalent or deeper lock %s when checking that %s is lockable\n",
+                          uvm_lock_order_to_string(conflicting_order),
+                          uvm_lock_order_to_string(lock_order));
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool __uvm_check_unlocked_order(uvm_lock_order_t lock_order)
+{
+    uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
+    if (!uvm_context)
+        return true;
+
+    if (uvm_context->skip_lock_tracking > 0)
+        return true;
+
+    if (lock_order == UVM_LOCK_ORDER_INVALID) {
+        UVM_ERR_PRINT("Checking for an invalid lock order\n");
+        return false;
+    }
+
+    if (test_bit(lock_order, uvm_context->acquired_lock_orders)) {
+        UVM_ERR_PRINT("Lock order %s acquired\n", uvm_lock_order_to_string(lock_order));
+        return false;
+    }
+    return true;
+}
+
+bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *uvm_context)
+{
+    uvm_lock_order_t lock_order;
+    NvU32 still_locked_count;
+
+    if (!uvm_context)
+        return true;
+
+    still_locked_count = bitmap_weight(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT);
+    if (still_locked_count == 0)
+        return true;
+
+    UVM_ERR_PRINT("Still %u acquired lock(s):\n", still_locked_count);
+
+    for_each_set_bit(lock_order, uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT) {
+        UVM_ERR_PRINT(" Lock %s, instance 0x%llx\n",
+                      uvm_lock_order_to_string(lock_order),
+                      (NvU64)uvm_context->acquired[lock_order]);
+    }
+
+    return false;
+}
+
+bool __uvm_thread_check_all_unlocked()
+{
+    return __uvm_check_all_unlocked(uvm_thread_context_lock_get());
+}
+
+NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order)
+{
+    // TODO: Bug 1772140: Notably bit locks currently do not work on memory
+    // allocated through vmalloc() (including big allocations created with
+    // uvm_kvmalloc()). The problem is the bit_waitqueue() helper used by the
+    // kernel internally that uses virt_to_page().
+    // To prevent us from using kmalloc() for a huge allocation, warn if the
+    // allocation size gets bigger than what we are comfortable with for
+    // kmalloc() in uvm_kvmalloc().
+    size_t size = sizeof(unsigned long) * BITS_TO_LONGS(count);
+    WARN_ON_ONCE(size > UVM_KMALLOC_THRESHOLD);
+
+    bit_locks->bits = kzalloc(size, NV_UVM_GFP_FLAGS);
+    if (!bit_locks->bits)
+        return NV_ERR_NO_MEMORY;
+
+#if UVM_IS_DEBUG()
+    uvm_locking_assert_initialized();
+    bit_locks->lock_order = lock_order;
+#endif
+
+    return NV_OK;
+}
+
+void uvm_bit_locks_deinit(uvm_bit_locks_t *bit_locks)
+{
+    kfree(bit_locks->bits);
+    memset(bit_locks, 0, sizeof(*bit_locks));
+}
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
--- a/kernel-open/nvidia-uvm/uvm_lock_test.c
+++ b/kernel-open/nvidia-uvm/uvm_lock_test.c
@@ -0,0 +1,460 @@
+/*******************************************************************************
+    Copyright (c) 2015 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_test.h"
+#include "uvm_lock.h"
+#include "uvm_global.h"
+#include "uvm_thread_context.h"
+
+#define UVM_LOCK_ORDER_FIRST  (UVM_LOCK_ORDER_INVALID + 1)
+#define UVM_LOCK_ORDER_SECOND (UVM_LOCK_ORDER_INVALID + 2)
+
+static bool fake_lock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    // Just use the lock_order as the void * handle for the lock
+    return __uvm_record_lock((void*)(long)lock_order, lock_order, flags);
+}
+
+static bool fake_unlock_common(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    // Just use the lock_order as the void * handle for the lock
+    return __uvm_record_unlock((void*)(long)lock_order, lock_order, flags);
+}
+
+static bool fake_unlock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    return fake_unlock_common(lock_order, flags);
+}
+
+static bool fake_unlock_out_of_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    return fake_unlock_common(lock_order, flags | UVM_LOCK_FLAGS_OUT_OF_ORDER);
+}
+
+static bool fake_downgrade(uvm_lock_order_t lock_order)
+{
+    // Just use the lock_order as the void * handle for the lock
+    return __uvm_record_downgrade((void*)(long)lock_order, lock_order);
+}
+
+static bool fake_check_locked(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    return __uvm_check_locked((void*)(long)lock_order, lock_order, flags);
+}
+
+// TODO: Bug 1799173: The lock asserts verify that the RM GPU lock isn't taken
+//       with the VA space lock in exclusive mode, and that the RM GPU lock
+//       isn't taken with mmap_lock held in any mode. Hack around this in the
+//       test to enable the checks until we figure out something better.
+static bool skip_lock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
+{
+    uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
+
+    if (lock_order == UVM_LOCK_ORDER_RM_GPUS)
+        return mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE;
+
+    return lock_order == UVM_LOCK_ORDER_MMAP_LOCK;
+}
+
+static NV_STATUS test_all_locks_from(uvm_lock_order_t from_lock_order)
+{
+    NvU32 exclusive;
+    uvm_lock_flags_t flags;
+    NvU32 out_of_order;
+    NvU32 lock_order;
+
+    TEST_CHECK_RET(from_lock_order != UVM_LOCK_ORDER_INVALID);
+
+    for (out_of_order = 0; out_of_order < 2; ++out_of_order) {
+        for (exclusive = 0; exclusive < 2; ++exclusive) {
+            flags = exclusive ? UVM_LOCK_FLAGS_MODE_EXCLUSIVE : UVM_LOCK_FLAGS_MODE_SHARED;
+
+            if (out_of_order)
+                flags |= UVM_LOCK_FLAGS_OUT_OF_ORDER;
+
+            for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                TEST_CHECK_RET(__uvm_check_unlocked_order(lock_order));
+                TEST_CHECK_RET(__uvm_check_lockable_order(lock_order, flags));
+            }
+
+            for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                if (skip_lock(lock_order, flags))
+                    continue;
+                TEST_CHECK_RET(fake_lock(lock_order, flags));
+            }
+
+            if (!skip_lock(from_lock_order, flags)) {
+                TEST_CHECK_RET(!__uvm_check_unlocked_order(from_lock_order));
+                TEST_CHECK_RET(!__uvm_check_lockable_order(from_lock_order, flags));
+            }
+
+            for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                if (skip_lock(lock_order, flags))
+                    continue;
+                TEST_CHECK_RET(fake_check_locked(lock_order, flags));
+            }
+
+            for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                if (skip_lock(lock_order, flags))
+                    continue;
+                TEST_CHECK_RET(fake_check_locked(lock_order, UVM_LOCK_FLAGS_MODE_ANY));
+            }
+
+            if (out_of_order == 0) {
+                for (lock_order = UVM_LOCK_ORDER_COUNT - 1; lock_order != from_lock_order - 1; --lock_order) {
+                    if (skip_lock(lock_order, flags))
+                        continue;
+                    TEST_CHECK_RET(fake_unlock(lock_order, flags));
+                }
+            }
+            else {
+                for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                    if (skip_lock(lock_order, flags))
+                        continue;
+                    TEST_CHECK_RET(fake_unlock_out_of_order(lock_order, flags));
+                }
+            }
+
+            for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
+                if (skip_lock(lock_order, flags))
+                    continue;
+                TEST_CHECK_RET(__uvm_check_unlocked_order(lock_order));
+                TEST_CHECK_RET(__uvm_check_lockable_order(lock_order, flags));
+            }
+        }
+    }
+
+    return NV_OK;
+}
+
+static NV_STATUS test_all_locks(void)
+{
+    TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_FIRST) == NV_OK);
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_locking_first_as_shared_then_test_higher_order_locks(void)
+{
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_FIRST + 1) == NV_OK);
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_locking_second_as_exclusive_then_test_higher_order_locks(void)
+{
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_SECOND + 1) == NV_OK);
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_unlocking_without_locking(void)
+{
+    // Unlocking a lock w/o locking any lock at all
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_unlocking_different_lock_order_than_locked(void)
+{
+    // Unlocking a different lock than locked
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(!__uvm_thread_check_all_unlocked());
+
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_unlocking_different_lock_instance_than_locked(void)
+{
+    // Unlocking a different instance of a lock than locked
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_unlocking_with_different_mode_than_locked(void)
+{
+    // Unlocking with different mode
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_unlocking_in_different_order_than_locked(void)
+{
+    // Unlocking in different order than locked
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock_out_of_order(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    // Unlocking in different order than locked (not necessarily incorrect, but
+    // commonly pointing to issues)
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_locking_out_of_order(void)
+{
+    // Locking in wrong order
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_locking_same_order_twice(void)
+{
+    // Locking the same order twice (lock tracking doesn't support this case although
+    // it's not necessarily incorrect)
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_checking_locked_when_no_locks_held(void)
+{
+    // Nothing locked
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_ANY));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_checking_exclusive_when_locked_as_shared(void)
+{
+    // Expecting exclusive while locked as shared
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_checking_shared_when_locked_as_exclusive(void)
+{
+    // Expecting shared while locked as exclusive
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_checking_locked_when_different_instance_held(void)
+{
+    // Wrong instance of a lock held
+    TEST_CHECK_RET(__uvm_record_lock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_checking_all_unlocked_when_lock_held(void)
+{
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(!__uvm_thread_check_all_unlocked());
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_downgrading(void)
+{
+    // Lock downgrade
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
+
+    TEST_CHECK_RET(fake_downgrade(UVM_LOCK_ORDER_FIRST));
+    TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
+
+    // Can't downgrade twice
+    TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
+    TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_downgrading_without_locking(void)
+{
+    // Downgrading a lock w/o locking any lock at all
+    TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_downgrading_when_different_instance_held(void)
+{
+    // Wrong instance of lock to downgrade
+    TEST_CHECK_RET(__uvm_record_lock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
+    TEST_CHECK_RET(__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_downgrading_when_locked_as_shared(void)
+{
+    // Downgrading a lock that was acquired as shared
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+    TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS test_try_locking_out_of_order(void)
+{
+    // Try-locking in wrong order
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK));
+    TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+    TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
+
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    return NV_OK;
+}
+
+static NV_STATUS run_all_lock_tests(void)
+{
+    // The test needs all locks to be released initially
+    TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
+
+    TEST_CHECK_RET(test_all_locks() == NV_OK);
+    TEST_CHECK_RET(test_locking_first_as_shared_then_test_higher_order_locks() == NV_OK);
+    TEST_CHECK_RET(test_locking_second_as_exclusive_then_test_higher_order_locks() == NV_OK);
+    TEST_CHECK_RET(test_unlocking_without_locking() == NV_OK);
+    TEST_CHECK_RET(test_unlocking_different_lock_order_than_locked() == NV_OK);
+    TEST_CHECK_RET(test_unlocking_different_lock_instance_than_locked() == NV_OK);
+    TEST_CHECK_RET(test_unlocking_with_different_mode_than_locked() == NV_OK);
+    TEST_CHECK_RET(test_unlocking_in_different_order_than_locked() == NV_OK);
+    TEST_CHECK_RET(test_locking_out_of_order() == NV_OK);
+    TEST_CHECK_RET(test_locking_same_order_twice() == NV_OK);
+    TEST_CHECK_RET(test_checking_locked_when_no_locks_held() == NV_OK);
+    TEST_CHECK_RET(test_checking_exclusive_when_locked_as_shared() == NV_OK);
+    TEST_CHECK_RET(test_checking_shared_when_locked_as_exclusive() == NV_OK);
+    TEST_CHECK_RET(test_checking_locked_when_different_instance_held() == NV_OK);
+    TEST_CHECK_RET(test_checking_all_unlocked_when_lock_held() == NV_OK);
+    TEST_CHECK_RET(test_downgrading() == NV_OK);
+    TEST_CHECK_RET(test_downgrading_without_locking() == NV_OK);
+    TEST_CHECK_RET(test_downgrading_when_different_instance_held() == NV_OK);
+    TEST_CHECK_RET(test_downgrading_when_locked_as_shared() == NV_OK);
+    TEST_CHECK_RET(test_try_locking_out_of_order() == NV_OK);
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_test_lock_sanity(UVM_TEST_LOCK_SANITY_PARAMS *params, struct file *filp)
+{
+    NV_STATUS status;
+    uvm_thread_context_wrapper_t thread_context_wrapper_backup;
+
+    // The global PM lock is acquired by the top-level UVM ioctl() entry point
+    // and still held here, which confuses the (pre-existing) test logic that
+    // assumes everything is unlocked at the beginning. Clearing the thread
+    // context data resolves the issue, but the original state needs to be saved
+    // and restored before exiting the test to avoid problems in the top-level
+    // code.
+    uvm_thread_context_save(&thread_context_wrapper_backup.context);
+
+    status = run_all_lock_tests();
+
+    uvm_thread_context_restore(&thread_context_wrapper_backup.context);
+
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
--- a/kernel-open/nvidia-uvm/uvm_map_external.h
+++ b/kernel-open/nvidia-uvm/uvm_map_external.h
@@ -0,0 +1,151 @@
+/*******************************************************************************
+    Copyright (c) 2016 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_MAP_EXTERNAL_H__
+#define __UVM_MAP_EXTERNAL_H__
+
+#include "uvm_forward_decl.h"
+#include "uvm_va_range.h"
+#include "uvm_tracker.h"
+#include "nv_uvm_types.h"
+#include "uvm_types.h"
+
+typedef struct
+{
+    NvU64 map_offset;
+    UvmGpuMappingType mapping_type;
+    UvmGpuCachingType caching_type;
+    UvmGpuFormatType format_type;
+    UvmGpuFormatElementBits element_bits;
+    UvmGpuCompressionType compression_type;
+} uvm_map_rm_params_t;
+
+static uvm_ext_gpu_range_tree_t *uvm_ext_gpu_range_tree(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
+{
+    UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL);
+
+    return &va_range->external.gpu_ranges[uvm_id_gpu_index(gpu->id)];
+}
+
+// Returns the first external map (if any) in the gpu's range tree.
+// va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
+// The caller must hold the range tree lock.
+static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_first(uvm_va_range_t *va_range, uvm_gpu_t *gpu, NvU64 start, NvU64 end)
+{
+    uvm_ext_gpu_range_tree_t *range_tree;
+    uvm_range_tree_node_t *node;
+
+    UVM_ASSERT(start >= va_range->node.start);
+    UVM_ASSERT(end <= va_range->node.end);
+
+    range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
+    node = uvm_range_tree_iter_first(&range_tree->tree, start, end);
+    return uvm_ext_gpu_map_container(node);
+}
+
+// Returns the external map following the provided map (if any) in address order from
+// the gpu's range tree. va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
+// The caller must hold the range tree lock.
+static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_next(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map, NvU64 end)
+{
+    uvm_ext_gpu_range_tree_t *range_tree;
+    uvm_range_tree_node_t *node;
+
+    if (!ext_gpu_map)
+        return NULL;
+
+    UVM_ASSERT(end <= va_range->node.end);
+
+    range_tree = uvm_ext_gpu_range_tree(va_range, ext_gpu_map->gpu);
+    node = uvm_range_tree_iter_next(&range_tree->tree, &ext_gpu_map->node, end);
+    return uvm_ext_gpu_map_container(node);
+}
+
+// The four iterators below require that the caller hold the gpu's range tree
+// lock.
+#define uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, start, end)             \
+    for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)); \
+         (ext_gpu_map);                                                                 \
+         (ext_gpu_map) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
+
+#define uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu, start, end) \
+    for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)),            \
+             (ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end));     \
+         (ext_gpu_map);                                                                            \
+         (ext_gpu_map) = (ext_gpu_map_next),                                                       \
+             (ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
+
+#define uvm_ext_gpu_map_for_each(ext_gpu_map, va_range, gpu) \
+    uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, (va_range)->node.start, (va_range)->node.end)
+
+#define uvm_ext_gpu_map_for_each_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu) \
+    uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map,                                   \
+                                     ext_gpu_map_next,                              \
+                                     va_range,                                      \
+                                     gpu,                                           \
+                                     (va_range)->node.start,                        \
+                                     (va_range)->node.end)
+
+// User-facing APIs (uvm_api_map_external_allocation, uvm_api_free) are declared
+// uvm_api.h.
+
+// Queries RM for the PTEs appropriate to the VA range and mem_info, allocates
+// page tables for the VA range, and writes the PTEs.
+//
+// va_range must have type UVM_VA_RANGE_TYPE_EXTERNAL or
+// UVM_VA_RANGE_TYPE_CHANNEL. The allocation descriptor given to RM is looked up
+// depending on the type of the va_range.
+// For va_ranges of type UVM_VA_RANGE_TYPE_CHANNEL, the descriptor is looked up
+// from the va_range. In these cases, the ext_gpu_map parameter should be NULL.
+// For va_ranges of type UVM_VA_RANGE_TYPE_EXTERNAL, it is looked up from the
+// ext_gpu_map parameter.
+//
+// This does not wait for the PTE writes to complete. The work is added to
+// the output tracker.
+NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
+                                         uvm_gpu_t *mapping_gpu,
+                                         const UvmGpuMemoryInfo *mem_info,
+                                         const uvm_map_rm_params_t *map_rm_params,
+                                         uvm_ext_gpu_map_t *ext_gpu_map,
+                                         uvm_tracker_t *out_tracker);
+
+// Removes and frees the external mapping for mapping_gpu from ext_gpu_map
+// mapped within va_range. If deferred_free_list is NULL, the RM handle is freed
+// immediately by this function. Otherwise the GPU which owns the allocation (if
+// any) is retained and the handle is added to the list for later processing by
+// uvm_deferred_free_object_list.
+//
+// If the mapping is a Sparse mapping, the mapping is removed and freed.
+// However, since sparse mappings do not have RM handles, nothing is added to
+// the deferred_free_list (if not NULL) and the GPU is no retained.
+//
+// The caller must hold the range tree lock for the mapping gpu and is
+// responsible for making sure that mapping gpu is retained across those calls.
+void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
+                             uvm_ext_gpu_map_t *ext_gpu_map,
+                             struct list_head *deferred_free_list);
+
+// Deferred free function which frees the RM handle and the object itself.
+void uvm_ext_gpu_map_free(uvm_ext_gpu_map_t *ext_gpu_map);
+
+#endif // __UVM_MAP_EXTERNAL_H__
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@@ -0,0 +1,72 @@
+/*******************************************************************************
+    Copyright (c) 2016-2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_gpu.h"
+#include "uvm_mem.h"
+
+void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
+{
+    parent_gpu->tlb_batch.va_invalidate_supported = false;
+
+    // 128 GB should be enough for all current RM allocations and leaves enough
+    // space for UVM internal mappings.
+    // A single top level PDE covers 64 or 128 MB on Maxwell so 128 GB is fine to use.
+    parent_gpu->rm_va_base = 0;
+    parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024;
+
+    parent_gpu->uvm_mem_va_base = 768ull * 1024 * 1024 * 1024;
+    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
+
+    // We don't have a compelling use case in UVM-Lite for direct peer
+    // migrations between GPUs, so don't bother setting them up.
+    parent_gpu->peer_copy_mode = UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
+
+    parent_gpu->max_channel_va = 1ULL << 40;
+
+    parent_gpu->max_host_va = 1ULL << 40;
+
+    // Maxwell can only map sysmem with 4K pages
+    parent_gpu->can_map_sysmem_with_large_pages = false;
+
+    // Maxwell cannot place GPFIFO in vidmem
+    parent_gpu->gpfifo_in_vidmem_supported = false;
+
+    parent_gpu->replayable_faults_supported = false;
+
+    parent_gpu->non_replayable_faults_supported = false;
+
+    parent_gpu->access_counters_supported = false;
+
+    parent_gpu->fault_cancel_va_supported = false;
+
+    parent_gpu->scoped_atomics_supported = false;
+
+    parent_gpu->sparse_mappings_supported = false;
+
+    parent_gpu->map_remap_larger_page_promotion = false;
+
+    parent_gpu->smc.supported = false;
+
+    parent_gpu->plc_supported = false;
+}
--- a/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -0,0 +1,64 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_gpu.h"
+#include "uvm_hal.h"
+
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "enable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "disable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+{
+    UVM_ASSERT_MSG(false, "clear_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
+    return 0;
+}
+
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    return false;
+}
+
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                   NvU32 index,
+                                                                   uvm_access_counter_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "access_counter_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
+}
--- a/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
@@ -0,0 +1,377 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_push.h"
+#include "clb06f.h"
+#include "clb0b5.h"
+
+void uvm_hal_maxwell_ce_init(uvm_push_t *push)
+{
+    // Notably this sends SET_OBJECT with the CE class on subchannel 0 instead
+    // of the recommended by HW subchannel 4 (subchannel 4 is recommended to
+    // match CE usage on GRCE). For the UVM driver using subchannel 0 has the
+    // benefit of also verifying that we ended up on the right PBDMA though as
+    // SET_OBJECT with CE class on subchannel 0 would fail on GRCE.
+    NV_PUSH_1U(B06F, SET_OBJECT, uvm_push_get_gpu(push)->parent->rm_info.ceClass);
+}
+
+void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
+{
+    NV_PUSH_2U(B0B5, OFFSET_OUT_UPPER, HWVALUE(B0B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
+                     OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
+}
+
+void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out)
+{
+    NV_PUSH_4U(B0B5, OFFSET_IN_UPPER,  HWVALUE(B0B5, OFFSET_IN_UPPER,  UPPER, NvOffset_HI32(offset_in)),
+                     OFFSET_IN_LOWER,  HWVALUE(B0B5, OFFSET_IN_LOWER,  VALUE, NvOffset_LO32(offset_in)),
+                     OFFSET_OUT_UPPER, HWVALUE(B0B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
+                     OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
+}
+
+// Perform an appropriate membar before a semaphore operation. Returns whether
+// the semaphore operation should include a flush.
+static bool maxwell_membar_before_semaphore(uvm_push_t *push)
+{
+    uvm_gpu_t *gpu;
+
+    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
+        // No MEMBAR requested, don't use a flush.
+        return false;
+    }
+
+    if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
+        // By default do a MEMBAR SYS and for that we can just use flush on the
+        // semaphore operation.
+        return true;
+    }
+
+    // MEMBAR GPU requested, do it on the HOST and skip the CE flush as CE
+    // doesn't have this capability.
+    gpu = uvm_push_get_gpu(push);
+    gpu->parent->host_hal->wait_for_idle(push);
+    gpu->parent->host_hal->membar_gpu(push);
+
+    return false;
+}
+
+void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
+{
+    NvU32 flush_value;
+    bool use_flush;
+
+    use_flush = maxwell_membar_before_semaphore(push);
+
+    if (use_flush)
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
+    else
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
+
+    NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
+                     SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
+                     SET_SEMAPHORE_PAYLOAD, payload);
+
+    NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
+       HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE));
+}
+
+void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
+{
+    NvU32 flush_value;
+    bool use_flush;
+
+    use_flush = maxwell_membar_before_semaphore(push);
+
+    if (use_flush)
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
+    else
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
+
+    NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
+                     SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
+                     SET_SEMAPHORE_PAYLOAD, payload);
+
+    NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
+       HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE));
+}
+
+void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
+{
+    NvU32 flush_value;
+    bool use_flush;
+
+    use_flush = maxwell_membar_before_semaphore(push);
+
+    if (use_flush)
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
+    else
+        flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
+
+    NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
+                     SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
+                     SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
+
+    NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
+       HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
+       HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE));
+}
+
+static void maxwell_membar_after_transfer(uvm_push_t *push)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
+        return;
+
+    // Flush on transfers only works when paired with a semaphore release. Use a
+    // host WFI + MEMBAR.
+    // http://nvbugs/1709888
+    gpu->parent->host_hal->wait_for_idle(push);
+
+    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
+        gpu->parent->host_hal->membar_gpu(push);
+    else
+        gpu->parent->host_hal->membar_sys(push);
+}
+
+static NvU32 ce_aperture(uvm_aperture_t aperture)
+{
+    BUILD_BUG_ON(HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
+                 HWCONST(B0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
+    BUILD_BUG_ON(HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
+                 HWCONST(B0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
+
+    UVM_ASSERT_MSG(aperture == UVM_APERTURE_VID || aperture == UVM_APERTURE_SYS, "aperture 0x%x\n", aperture);
+
+    if (aperture == UVM_APERTURE_SYS)
+        return HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
+    else
+        return HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
+}
+
+// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
+// flags
+NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    NvU32 launch_dma_src_dst_type = 0;
+
+    if (src.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
+
+    if (dst.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
+
+    if (!src.is_virtual && !dst.is_virtual) {
+        NV_PUSH_2U(B0B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
+                         SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+    else if (!src.is_virtual) {
+        NV_PUSH_1U(B0B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
+    }
+    else if (!dst.is_virtual) {
+        NV_PUSH_1U(B0B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+
+    return launch_dma_src_dst_type;
+}
+
+// Noop, since DISABLE_PLC doesn't exist in Maxwell.
+NvU32 uvm_hal_maxwell_ce_plc_mode(void)
+{
+    return 0;
+}
+
+void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size)
+{
+    // If >4GB copies ever become an important use case, this function should
+    // use multi-line transfers so we don't have to iterate (bug 1766588).
+    static const size_t max_single_copy_size = 0xFFFFFFFF;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    NvU32 pipelined_value;
+    NvU32 launch_dma_src_dst_type;
+    NvU32 launch_dma_plc_mode;
+    bool first_operation = true;
+
+    UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_validate(push, dst, src),
+                   "Memcopy validation failed in channel %s, GPU %s",
+                   push->channel->name,
+                   uvm_gpu_name(gpu));
+
+    gpu->parent->ce_hal->memcopy_patch_src(push, &src);
+
+    launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
+    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
+
+    do {
+        NvU32 copy_this_time = (NvU32)min(size, max_single_copy_size);
+
+        if (first_operation && uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
+            pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
+        else
+            pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
+
+        gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
+
+        NV_PUSH_1U(B0B5, LINE_LENGTH_IN, copy_this_time);
+
+        NV_PUSH_1U(B0B5, LAUNCH_DMA,
+           HWCONST(B0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
+           HWCONST(B0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
+           HWCONST(B0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
+           HWCONST(B0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
+           HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
+           launch_dma_src_dst_type |
+           launch_dma_plc_mode |
+           pipelined_value);
+
+        dst.address += copy_this_time;
+        src.address += copy_this_time;
+        size -= copy_this_time;
+        first_operation = false;
+    } while (size > 0);
+
+    maxwell_membar_after_transfer(push);
+}
+
+void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, size_t size)
+{
+    uvm_hal_maxwell_ce_memcopy(push, uvm_gpu_address_virtual(dst_va), uvm_gpu_address_virtual(src_va), size);
+}
+
+// Push SET_DST_PHYS mode if needed and return LAUNCH_DMA_DST_TYPE flags
+static NvU32 memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
+{
+    if (dst.is_virtual)
+        return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
+
+    NV_PUSH_1U(B0B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
+}
+
+static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size, size_t memset_element_size)
+{
+    // If >4GB memsets ever become an important use case, this function should
+    // use multi-line transfers so we don't have to iterate (bug 1766588).
+    static const size_t max_single_memset_size = 0xFFFFFFFF;
+
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU32 pipelined_value;
+    NvU32 launch_dma_dst_type;
+    NvU32 launch_dma_plc_mode;
+
+    UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
+                   "Memset validation failed in channel %s, GPU %s",
+                   push->channel->name,
+                   uvm_gpu_name(gpu));
+
+    launch_dma_dst_type = memset_push_phys_mode(push, dst);
+    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
+
+    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
+        pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
+    else
+        pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
+
+    do {
+        NvU32 memset_this_time = (NvU32)min(size, max_single_memset_size);
+
+        gpu->parent->ce_hal->offset_out(push, dst.address);
+
+        NV_PUSH_1U(B0B5, LINE_LENGTH_IN, memset_this_time);
+
+        NV_PUSH_1U(B0B5, LAUNCH_DMA,
+           HWCONST(B0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
+           HWCONST(B0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
+           HWCONST(B0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
+           HWCONST(B0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) |
+           HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
+           launch_dma_dst_type |
+           launch_dma_plc_mode |
+           pipelined_value);
+
+        dst.address += memset_this_time * memset_element_size;
+        size -= memset_this_time;
+        pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
+    } while (size > 0);
+
+    maxwell_membar_after_transfer(push);
+}
+
+void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
+{
+    NV_PUSH_2U(B0B5, SET_REMAP_CONST_B,    (NvU32)value,
+                     SET_REMAP_COMPONENTS,
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X,               CONST_B) |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE,      ONE)     |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS,  ONE));
+
+    memset_common(push, dst, size, 1);
+}
+
+void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size)
+{
+    UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);
+
+    size /= 4;
+
+    NV_PUSH_2U(B0B5, SET_REMAP_CONST_B,    value,
+                     SET_REMAP_COMPONENTS,
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X,               CONST_B) |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE,      FOUR)    |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS,  ONE));
+
+    memset_common(push, dst, size, 4);
+}
+
+void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
+{
+    UVM_ASSERT_MSG(size % 8 == 0, "size: %zd\n", size);
+
+    size /= 8;
+
+    NV_PUSH_3U(B0B5, SET_REMAP_CONST_A, (NvU32)value,
+                     SET_REMAP_CONST_B, (NvU32)(value >> 32),
+                     SET_REMAP_COMPONENTS,
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X,               CONST_A) |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_Y,               CONST_B) |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE,      FOUR)    |
+       HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS,  TWO));
+
+    memset_common(push, dst, size, 8);
+}
+
+void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size)
+{
+    uvm_hal_maxwell_ce_memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
+}
--- a/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
@@ -0,0 +1,95 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_gpu.h"
+#include "uvm_hal.h"
+
+void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "enable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "disable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+{
+    UVM_ASSERT_MSG(false, "clear_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_read_put is not supported on GPU: %s.\n", parent_gpu->name);
+    return 0;
+}
+
+NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_read_get is not supported on GPU: %s.\n", parent_gpu->name);
+    return 0;
+}
+
+void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_write_get is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_get_ve_id is not supported on Maxwell GPUs.\n");
+    return 0;
+}
+
+void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                         NvU32 index,
+                                                         uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    return false;
+}
+
+void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
+NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
+    return 0;
+}
+
+void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                         void *fault_packet,
+                                                                         uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n", parent_gpu->name);
+}
+
--- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c
@@ -0,0 +1,326 @@
+/*******************************************************************************
+    Copyright (c) 2021 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_linux.h"
+#include "uvm_global.h"
+#include "uvm_hal_types.h"
+#include "uvm_hal.h"
+#include "uvm_push.h"
+#include "cla16f.h"
+#include "clb06f.h"
+
+void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push)
+{
+    NV_PUSH_1U(A16F, WFI, 0);
+}
+
+void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push)
+{
+    NV_PUSH_1U(A16F, MEM_OP_B,
+       HWCONST(A16F, MEM_OP_B, OPERATION, SYSMEMBAR_FLUSH));
+}
+
+void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  NvU32 depth,
+                                                  uvm_membar_t membar)
+{
+    NvU32 target;
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+
+    // Only Pascal+ supports invalidating down from a specific depth.
+    (void)depth;
+
+    (void)membar;
+
+    if (pdb.aperture == UVM_APERTURE_VID)
+        target = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, VID_MEM);
+    else
+        target = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+
+    NV_PUSH_2U(A16F, MEM_OP_A, target |
+                               HWVALUE(A16F, MEM_OP_A, TLB_INVALIDATE_ADDR, pdb.address),
+                     MEM_OP_B, HWCONST(A16F, MEM_OP_B, OPERATION, MMU_TLB_INVALIDATE) |
+                               HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_PDB, ONE) |
+                               HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, ENABLE));
+}
+
+void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  NvU32 depth,
+                                                  uvm_membar_t membar)
+{
+    NvU32 target;
+    NvU32 pdb_lo;
+    NvU32 pdb_hi;
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+
+    // Only Pascal+ supports invalidating down from a specific depth.
+    (void)depth;
+
+    (void)membar;
+
+    if (pdb.aperture == UVM_APERTURE_VID)
+        target = HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_TARGET, VID_MEM);
+    else
+        target = HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
+
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+    pdb_lo = pdb.address & HWMASK(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO);
+    pdb_hi = pdb.address >> HWSIZE(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO);
+
+    NV_PUSH_2U(B06F, MEM_OP_C, target |
+                               HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
+                               HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
+                               HWVALUE(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO, pdb_lo),
+                     MEM_OP_D, HWCONST(B06F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
+                               HWVALUE(B06F, MEM_OP_D, TLB_INVALIDATE_ADDR_HI, pdb_hi));
+}
+
+void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
+                                            uvm_gpu_phys_address_t pdb,
+                                            NvU32 depth,
+                                            NvU64 base,
+                                            NvU64 size,
+                                            NvU32 page_size,
+                                            uvm_membar_t membar)
+{
+    // No per VA invalidate on Maxwell, redirect to invalidate all.
+    uvm_push_get_gpu(push)->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
+}
+
+void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
+                                              uvm_gpu_phys_address_t pdb,
+                                              UVM_TEST_INVALIDATE_TLB_PARAMS *params)
+{
+    NvU32 target_pdb = 0;
+    NvU32 invalidate_gpc_value;
+
+    // Only Pascal+ supports invalidating down from a specific depth. We
+    // invalidate all
+    UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
+    pdb.address >>= 12;
+
+    UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
+    if (pdb.aperture == UVM_APERTURE_VID)
+        target_pdb = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, VID_MEM);
+    else
+        target_pdb = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
+    target_pdb |= HWVALUE(A16F, MEM_OP_A, TLB_INVALIDATE_ADDR, pdb.address);
+
+    if (params->disable_gpc_invalidate)
+        invalidate_gpc_value = HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, DISABLE);
+    else
+        invalidate_gpc_value = HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, ENABLE);
+
+    NV_PUSH_2U(A16F, MEM_OP_A, target_pdb,
+                     MEM_OP_B, HWCONST(A16F, MEM_OP_B, OPERATION, MMU_TLB_INVALIDATE) |
+                               HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_PDB, ONE) |
+                               invalidate_gpc_value);
+}
+
+void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size)
+{
+    UVM_ASSERT_MSG(size % 4 == 0, "size %u\n", size);
+
+    if (size == 0)
+        return;
+
+    // size is in bytes so divide by the method size (4 bytes)
+    size /= 4;
+
+    while (size > 0) {
+        // noop_this_time includes the NOP method itself and hence can be
+        // up to COUNT_MAX + 1.
+        NvU32 noop_this_time = min(UVM_METHOD_COUNT_MAX + 1, size);
+
+        // -1 for the NOP method itself.
+        NV_PUSH_NU_NONINC(A16F, NOP, noop_this_time - 1);
+
+        size -= noop_this_time;
+    }
+}
+
+void uvm_hal_maxwell_host_interrupt(uvm_push_t *push)
+{
+    NV_PUSH_1U(A16F, NON_STALL_INTERRUPT, 0);
+}
+
+void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
+{
+    NvU32 sem_lo;
+    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
+    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
+
+    uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
+
+    NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
+                     SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
+                     SEMAPHOREC, payload,
+                     SEMAPHORED, HWCONST(A16F, SEMAPHORED, OPERATION, RELEASE) |
+                                 HWCONST(A16F, SEMAPHORED, RELEASE_SIZE, 4BYTE)|
+                                 HWCONST(A16F, SEMAPHORED, RELEASE_WFI, DIS));
+}
+
+void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
+{
+    NvU32 sem_lo;
+    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
+    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
+    NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
+                     SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
+                     SEMAPHOREC, payload,
+                     SEMAPHORED, HWCONST(A16F, SEMAPHORED, ACQUIRE_SWITCH, ENABLED) |
+                                 HWCONST(A16F, SEMAPHORED, OPERATION, ACQ_GEQ));
+
+}
+
+void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
+{
+    NvU32 sem_lo;
+    UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
+    sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
+
+    uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
+
+    NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
+                     SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
+                     SEMAPHOREC, 0xdeadbeef,
+                     SEMAPHORED, HWCONST(A16F, SEMAPHORED, OPERATION, RELEASE) |
+                                 HWCONST(A16F, SEMAPHORED, RELEASE_SIZE, 16BYTE)|
+                                 HWCONST(A16F, SEMAPHORED, RELEASE_WFI, DIS));
+}
+
+void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
+{
+    NvU64 fifo_entry_value;
+
+    UVM_ASSERT(!uvm_global_is_suspended());
+    UVM_ASSERT_MSG(pushbuffer_va % 4 == 0, "pushbuffer va unaligned: %llu\n", pushbuffer_va);
+    UVM_ASSERT_MSG(pushbuffer_length % 4 == 0, "pushbuffer length unaligned: %u\n", pushbuffer_length);
+
+    fifo_entry_value =          HWVALUE(A16F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
+    fifo_entry_value |= (NvU64)(HWVALUE(A16F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
+                                HWVALUE(A16F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2) |
+                                HWCONST(A16F, GP_ENTRY1, PRIV,   KERNEL)) << 32;
+
+    *fifo_entry = fifo_entry_value;
+}
+
+void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put)
+{
+    UVM_GPU_WRITE_ONCE(*channel->channel_info.gpPut, gpu_put);
+}
+
+void uvm_hal_maxwell_host_init_noop(uvm_push_t *push)
+{
+}
+
+void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type)
+{
+    UVM_ASSERT_MSG(false, "host replay_faults called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr)
+{
+    UVM_ASSERT_MSG(false, "host cancel_faults_global called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
+                                                       uvm_gpu_phys_address_t instance_ptr,
+                                                       NvU32 gpc_id,
+                                                       NvU32 client_id)
+{
+    UVM_ASSERT_MSG(false, "host cancel_faults_targeted called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
+                                                  uvm_gpu_phys_address_t pdb,
+                                                  const uvm_fault_buffer_entry_t *fault_entry,
+                                                  uvm_fault_cancel_va_mode_t cancel_va_mode)
+{
+    UVM_ASSERT_MSG(false, "host cancel_faults_va called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
+                                                                      uvm_user_channel_t *user_channel,
+                                                                      const uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "host clear_faulted_channel_sw_method called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
+                                                                   uvm_user_channel_t *user_channel,
+                                                                   const uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "host clear_faulted_channel_method called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
+                                                                     const uvm_fault_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "host clear_faulted_channel_register called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
+{
+    UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
+{
+    UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
+}
+
+void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
+                                                               const uvm_access_counter_buffer_entry_t *buffer_entry)
+{
+    UVM_ASSERT_MSG(false, "host access_counter_clear_targeted called on Maxwell GPU\n");
+}
+
+NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)
+{
+    NvU32 time0;
+    NvU32 time1_first, time1_second;
+
+    // When reading the TIME, TIME_1 should be read first, followed by TIME_0,
+    // then a second reading of TIME_1 should be done. If the two readings of
+    // do not match, this process should be repeated.
+    //
+    // Doing that will catch the 4-second wrap-around
+    do {
+        time1_first  = UVM_GPU_READ_ONCE(*gpu->time.time1_register);
+        rmb();
+        time0        = UVM_GPU_READ_ONCE(*gpu->time.time0_register);
+        rmb();
+        time1_second = UVM_GPU_READ_ONCE(*gpu->time.time1_register);
+    } while (time1_second != time1_first);
+
+    return (((NvU64)time1_first) << 32) + time0;
+}
--- a/Show More
+++ b/Show More