535.104.05

535.98
535.86.10
2026-01-27 19:49:47 +00:00 · 2023-08-22 15:09:37 +02:00 · 2023-08-08 18:28:38 +02:00 · 2023-07-31 18:17:14 +02:00 · 2023-07-18 16:00:22 +02:00 · 2023-07-10 15:58:02 +02:00
1726 changed files with 311399 additions and 106776 deletions
--- a/.github/ISSUE_TEMPLATE/10_functional_bug.yml
+++ b/.github/ISSUE_TEMPLATE/10_functional_bug.yml
@@ -1,5 +1,8 @@
 name: Report a functional bug 🐛
-description: Functional bugs affect operation or stability of the driver and/or hardware.
+description: |
+  Functional bugs affect operation or stability of the driver or hardware.
+
+  Bugs with the closed source driver must be reported on the forums (see link on New Issue page below).
 labels:
  - "bug"
 body:
@@ -18,14 +21,12 @@ body:
    description: "Which open-gpu-kernel-modules version are you running? Be as specific as possible: SHA is best when built from specific commit."
  validations:
    required: true
- type: dropdown
+- type: checkboxes
  id: sw_driver_proprietary
  attributes:
-    label: "Does this happen with the proprietary driver (of the same version) as well?"
+    label: "Please confirm this issue does not happen with the proprietary driver (of the same version). This issue tracker is only for bugs specific to the open kernel driver."
    options:
-    - "Yes"
-    - "No"
-    - "I cannot test this"
+    - label: "I confirm that this does not happen with the proprietary driver package."
  validations:
    required: true
 - type: input
@@ -42,6 +43,14 @@ body:
    description: "Which kernel are you running? (output of `uname -a`, say if you built it yourself)"
  validations:
    required: true
+- type: checkboxes
+  id: sw_host_kernel_stable
+  attributes:
+    label: "Please confirm you are running a stable release kernel (e.g. not a -rc). We do not accept bug reports for unreleased kernels."
+    options:
+    - label: "I am running on a stable kernel release."
+  validations:
+    required: true
 - type: input
  id: hw_gpu_type
  attributes:
@@ -78,7 +87,10 @@ body:
  id: bug_report_gz
  attributes:
    label: nvidia-bug-report.log.gz
-    description: "Please reproduce the problem, after that run `nvidia-bug-report.sh`, and attach the resulting nvidia-bug-report.log.gz here."
+    description: |
+      Please reproduce the problem, after that run `nvidia-bug-report.sh`, and attach the resulting nvidia-bug-report.log.gz here.
+
+      Reports without this file will be closed.
    placeholder: You can usually just drag & drop the file into this textbox.
  validations:
    required: true
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,14 +1,14 @@
 blank_issues_enabled: false
 contact_links:
+  - name: Report a bug with the proprietary driver
+    url: https://forums.developer.nvidia.com/c/gpu-graphics/linux/148
+    about: Bugs that aren't specific to the open source driver in this repository must be reported with the linked forums instead.
  - name: Report a cosmetic issue
    url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/general
    about: We are not currently accepting cosmetic-only changes such as whitespace, typos, or simple renames. You can still discuss and collect them on the boards.
  - name: Ask a question
    url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/q-a
    about: Unsure of what to click, where to go, what the process for your thing is? We're happy to help. Click to visit the discussion board and say hello!
-  - name: Report a bug with the proprietary driver
-    url: https://forums.developer.nvidia.com/c/gpu-graphics/linux/148
-    about: Bugs that aren't specific to the open source driver in this repository should be reported with the linked forums instead. If you are unsure on what kind of bug you have, feel free to open a thread in Discussions. We're here to help!
  - name: Suggest a feature
    url: https://github.com/NVIDIA/open-gpu-kernel-modules/discussions/categories/ideas
    about: Please do not open Issues for feature requests; instead, suggest and discuss new features on the Github discussion board. If you have a feature you worked on and want to PR it, please also open a discussion before doing so.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,14 +1,48 @@
 # Changelog

-## Release 525 Entries
+## Release 535 Entries

-### [525.147.05] 2023-10-31
+### [535.104.05] 2023-08-22

-### [525.125.06] 2023-06-26
+### [535.98] 2023-08-08
+
+### [535.86.10] 2023-07-31
+
+### [535.86.05] 2023-07-18
+
+### [535.54.03] 2023-06-14
+
+### [535.43.02] 2023-05-30

 #### Fixed

- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
+- Fixed console restore with traditional VGA consoles.
+
+#### Added
+
+- Added support for Run Time D3 (RTD3) on Ampere and later GPUs.
+- Added support for G-Sync on desktop GPUs.
+
+## Release 530 Entries
+
+### [530.41.03] 2023-03-23
+
+### [530.30.02] 2023-02-28
+
+#### Changed
+
+- GSP firmware is now distributed as `gsp_tu10x.bin` and `gsp_ga10x.bin` to better reflect the GPU architectures supported by each firmware file in this release.
+    - The .run installer will continue to install firmware to /lib/firmware/nvidia/<version> and the nvidia.ko kernel module will load the appropriate firmware for each GPU at runtime.
+  
+#### Fixed
+
+- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
+
+#### Added
+
+- Support for power management features like Suspend, Hibernate and Resume.
+
+## Release 525 Entries

 ### [525.116.04] 2023-05-09

--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 525.147.05.
+version 535.104.05.


 ## How to Build
@@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-525.147.05 driver release.  This can be achieved by installing
+535.104.05 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@@ -162,12 +162,25 @@ for the target kernel.
 - `src/nvidia/`                 The OS-agnostic code for nvidia.ko
 - `src/nvidia-modeset/`         The OS-agnostic code for nvidia-modeset.ko
 - `src/common/`                 Utility code used by one or more of nvidia.ko and nvidia-modeset.ko
+- `nouveau/`                    Tools for integration with the Nouveau device driver
+
+
+## Nouveau device driver integration
+
+The Python script in the 'nouveau' directory is used to extract some of the
+firmware binary images (and related data) encoded in the source code and
+store them as distinct files.  These files are used by the Nouveau device
+driver to load and communicate with the GSP firmware.
+
+The layout of the binary files is described in nouveau_firmware_layout.ods,
+which is an OpenDocument Spreadsheet file, compatible with most spreadsheet
+software applications.


 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 525.147.05 release,
+(see the table below). However, in the 535.104.05 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@@ -175,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/525.147.05/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/535.104.05/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@@ -645,7 +658,6 @@ Subsystem Device ID.
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 147F |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1622 |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1623 |
-| NVIDIA PG509-210                                | 20B2 10DE 1625 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A7 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A8 |
 | NVIDIA A100 80GB PCIe                           | 20B5 10DE 1533 |
@@ -653,7 +665,7 @@ Subsystem Device ID.
 | NVIDIA PG506-232                                | 20B6 10DE 1492 |
 | NVIDIA A30                                      | 20B7 10DE 1532 |
 | NVIDIA A30                                      | 20B7 10DE 1804 |
-| NVIDIA A30                                      | 20B7 10DE 1852 |
+| NVIDIA A800-SXM4-40GB                           | 20BD 10DE 17F4 |
 | NVIDIA A100-PCIE-40GB                           | 20F1 10DE 145F |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179B |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179C |
@@ -665,6 +677,10 @@ Subsystem Device ID.
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 17A2 |
 | NVIDIA A800 80GB PCIe                           | 20F5 10DE 1799 |
 | NVIDIA A800 80GB PCIe LC                        | 20F5 10DE 179A |
+| NVIDIA A800 40GB Active                         | 20F6 1028 180A |
+| NVIDIA A800 40GB Active                         | 20F6 103C 180A |
+| NVIDIA A800 40GB Active                         | 20F6 10DE 180A |
+| NVIDIA A800 40GB Active                         | 20F6 17AA 180A |
 | NVIDIA GeForce GTX 1660 Ti                      | 2182           |
 | NVIDIA GeForce GTX 1660                         | 2184           |
 | NVIDIA GeForce GTX 1650 SUPER                   | 2187           |
@@ -723,6 +739,7 @@ Subsystem Device ID.
 | NVIDIA A10                                      | 2236 10DE 1482 |
 | NVIDIA A10G                                     | 2237 10DE 152F |
 | NVIDIA A10M                                     | 2238 10DE 1677 |
+| NVIDIA H100 NVL                                 | 2321 10DE 1839 |
 | NVIDIA H800 PCIe                                | 2322 10DE 17A4 |
 | NVIDIA H800                                     | 2324 10DE 17A6 |
 | NVIDIA H800                                     | 2324 10DE 17A8 |
@@ -730,6 +747,7 @@ Subsystem Device ID.
 | NVIDIA H100 80GB HBM3                           | 2330 10DE 16C1 |
 | NVIDIA H100 PCIe                                | 2331 10DE 1626 |
 | NVIDIA H100                                     | 2339 10DE 17FC |
+| NVIDIA H800 NVL                                 | 233A 10DE 183A |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
 | NVIDIA RTX A5500 Laptop GPU                     | 2438           |
@@ -818,8 +836,14 @@ Subsystem Device ID.
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 103C 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 10DE 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 17AA 16A1 |
+| NVIDIA RTX 5000 Ada Generation                  | 26B2 1028 17FA |
+| NVIDIA RTX 5000 Ada Generation                  | 26B2 103C 17FA |
+| NVIDIA RTX 5000 Ada Generation                  | 26B2 10DE 17FA |
+| NVIDIA RTX 5000 Ada Generation                  | 26B2 17AA 17FA |
 | NVIDIA L40                                      | 26B5 10DE 169D |
 | NVIDIA L40                                      | 26B5 10DE 17DA |
+| NVIDIA L40S                                     | 26B9 10DE 1851 |
+| NVIDIA L40S                                     | 26B9 10DE 18CF |
 | NVIDIA GeForce RTX 4080                         | 2704           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2717           |
 | NVIDIA RTX 5000 Ada Generation Laptop GPU       | 2730           |
@@ -832,11 +856,16 @@ Subsystem Device ID.
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 103C 16FA |
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 10DE 16FA |
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 17AA 16FA |
+| NVIDIA RTX 4000 Ada Generation                  | 27B2 1028 181B |
+| NVIDIA RTX 4000 Ada Generation                  | 27B2 103C 181B |
+| NVIDIA RTX 4000 Ada Generation                  | 27B2 10DE 181B |
+| NVIDIA RTX 4000 Ada Generation                  | 27B2 17AA 181B |
 | NVIDIA L4                                       | 27B8 10DE 16CA |
 | NVIDIA L4                                       | 27B8 10DE 16EE |
 | NVIDIA RTX 4000 Ada Generation Laptop GPU       | 27BA           |
 | NVIDIA RTX 3500 Ada Generation Laptop GPU       | 27BB           |
 | NVIDIA GeForce RTX 4080 Laptop GPU              | 27E0           |
+| NVIDIA RTX 3500 Ada Generation Embedded GPU     | 27FB           |
 | NVIDIA GeForce RTX 4060 Ti                      | 2803           |
 | NVIDIA GeForce RTX 4060 Ti                      | 2805           |
 | NVIDIA GeForce RTX 4070 Laptop GPU              | 2820           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -70,9 +70,13 @@ $(foreach _module, $(NV_KERNEL_MODULES), \

 EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
-EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
+EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"525.147.05\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.104.05\"
+
+ifneq ($(SYSSRCHOST1X),)
+ EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
+endif

 EXTRA_CFLAGS += -Wno-unused-function

@@ -87,7 +91,8 @@ ifeq ($(ARCH),arm64)
 endif

 ifeq ($(NV_BUILD_TYPE),debug)
- EXTRA_CFLAGS += -g -gsplit-dwarf
+ EXTRA_CFLAGS += -g
+ EXTRA_CFLAGS += $(call cc-option,-gsplit-dwarf,)
 endif

 EXTRA_CFLAGS += -ffreestanding
@@ -212,8 +217,10 @@ NV_HEADER_PRESENCE_TESTS = \
 drm/drm_auth.h \
 drm/drm_gem.h \
 drm/drm_crtc.h \
+ drm/drm_color_mgmt.h \
 drm/drm_atomic.h \
 drm/drm_atomic_helper.h \
+ drm/drm_atomic_state_helper.h \
 drm/drm_encoder.h \
 drm/drm_atomic_uapi.h \
 drm/drm_drv.h \
@@ -237,7 +244,6 @@ NV_HEADER_PRESENCE_TESTS = \
 linux/efi.h \
 linux/kconfig.h \
 linux/platform/tegra/mc_utils.h \
- linux/semaphore.h \
 linux/printk.h \
 linux/ratelimit.h \
 linux/prio_tree.h \
@@ -259,6 +265,7 @@ NV_HEADER_PRESENCE_TESTS = \
 linux/platform/tegra/dce/dce-client-ipc.h \
 linux/nvhost.h \
 linux/nvhost_t194.h \
+ linux/host1x-next.h \
 asm/book3s/64/hash-64k.h \
 asm/set_memory.h \
 asm/prom.h \
@@ -292,8 +299,11 @@ NV_HEADER_PRESENCE_TESTS = \
 linux/iosys-map.h \
 asm/coco.h \
 linux/vfio_pci_core.h \
+ linux/mdev.h \
 soc/tegra/bpmp-abi.h \
- soc/tegra/bpmp.h
+ soc/tegra/bpmp.h \
+ linux/cc_platform.h \
+ asm/cpufeature.h

 # Filename to store the define for the header in $(1); this is only consumed by
 # the rule below that concatenates all of these together.
--- a/kernel-open/common/inc/nv-firmware-registry.h
+++ b/kernel-open/common/inc/nv-firmware-registry.h
@@ -0,0 +1,83 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+//
+// This file holds GPU firmware related registry key definitions that are
+// shared between Windows and Unix
+//
+
+#ifndef NV_FIRMWARE_REGISTRY_H
+#define NV_FIRMWARE_REGISTRY_H
+
+//
+// Registry key that when enabled, will enable use of GPU firmware.
+//
+// Possible mode values:
+//  0 - Do not enable GPU firmware
+//  1 - Enable GPU firmware
+//  2 - (Default) Use the default enablement policy for GPU firmware
+//
+// Setting this to anything other than 2 will alter driver firmware-
+// enablement policies, possibly disabling GPU firmware where it would
+// have otherwise been enabled by default.
+//
+// Policy bits:
+//
+// POLICY_ALLOW_FALLBACK:
+//  As the normal behavior is to fail GPU initialization if this registry
+//  entry is set in such a way that results in an invalid configuration, if
+//  instead the user would like the driver to automatically try to fallback
+//  to initializing the failing GPU with firmware disabled, then this bit can
+//  be set (ex: 0x11 means try to enable GPU firmware but fall back if needed).
+//  Note that this can result in a mixed mode configuration (ex: GPU0 has
+//  firmware enabled, but GPU1 does not).
+//
+#define NV_REG_STR_ENABLE_GPU_FIRMWARE                   "EnableGpuFirmware"
+
+#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_MASK              0x0000000F
+#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DISABLED          0x00000000
+#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_ENABLED           0x00000001
+#define NV_REG_ENABLE_GPU_FIRMWARE_MODE_DEFAULT           0x00000002
+
+#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_MASK            0x000000F0
+#define NV_REG_ENABLE_GPU_FIRMWARE_POLICY_ALLOW_FALLBACK  0x00000010
+
+#define NV_REG_ENABLE_GPU_FIRMWARE_DEFAULT_VALUE          0x00000012
+
+//
+// Registry key that when enabled,  will send GPU firmware logs
+// to the system log, when possible.
+//
+// Possible values:
+//  0 - Do not send GPU firmware logs to the system log
+//  1 - Enable sending of GPU firmware logs to the system log
+//  2 - (Default) Enable sending of GPU firmware logs to the system log for
+//      the debug kernel driver build only
+//
+#define NV_REG_STR_ENABLE_GPU_FIRMWARE_LOGS                "EnableGpuFirmwareLogs"
+
+#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_DISABLE            0x00000000
+#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE             0x00000001
+#define NV_REG_ENABLE_GPU_FIRMWARE_LOGS_ENABLE_ON_DEBUG    0x00000002
+
+#endif  // NV_FIRMWARE_REGISTRY_H
--- a/kernel-open/common/inc/nv-firmware.h
+++ b/kernel-open/common/inc/nv-firmware.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -81,12 +81,12 @@ static inline const char *nv_firmware_path(
    {
        switch (fw_chip_family)
        {
-            case NV_FIRMWARE_CHIP_FAMILY_AD10X:
-                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ad10x.bin");
-
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
+
            case NV_FIRMWARE_CHIP_FAMILY_GA100:  // fall through
-            case NV_FIRMWARE_CHIP_FAMILY_GA10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU11X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU10X:
                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
@@ -100,12 +100,12 @@ static inline const char *nv_firmware_path(
    {
        switch (fw_chip_family)
        {
-            case NV_FIRMWARE_CHIP_FAMILY_AD10X:
-                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ad10x.bin");
-
            case NV_FIRMWARE_CHIP_FAMILY_GH100:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_AD10X:  // fall through
+            case NV_FIRMWARE_CHIP_FAMILY_GA10X:
+                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
+
            case NV_FIRMWARE_CHIP_FAMILY_GA100:  // fall through
-            case NV_FIRMWARE_CHIP_FAMILY_GA10X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU11X:  // fall through
            case NV_FIRMWARE_CHIP_FAMILY_TU10X:
                return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
@@ -125,7 +125,7 @@ static inline const char *nv_firmware_path(
 // which will then be invoked (at the top-level) for each
 // gsp_*.bin (but not gsp_log_*.bin)
 #if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
-NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ad10x.bin")
+NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
 NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
 #endif  // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)

--- a/kernel-open/common/inc/nv-hypervisor.h
+++ b/kernel-open/common/inc/nv-hypervisor.h
@@ -41,6 +41,7 @@ typedef enum _HYPERVISOR_TYPE
 #define CMD_VGPU_VFIO_INJECT_INTERRUPT        1
 #define CMD_VGPU_VFIO_REGISTER_MDEV           2
 #define CMD_VGPU_VFIO_PRESENT                 3
+#define CMD_VFIO_PCI_CORE_PRESENT             4

 #define MAX_VF_COUNT_PER_GPU 64

@@ -91,30 +92,6 @@ typedef enum VGPU_DEVICE_STATE_E
    NV_VGPU_DEV_IN_USE = 2
 } VGPU_DEVICE_STATE;

-typedef enum _VMBUS_CMD_TYPE
-{
-    VMBUS_CMD_TYPE_INVALID    = 0,
-    VMBUS_CMD_TYPE_SETUP      = 1,
-    VMBUS_CMD_TYPE_SENDPACKET = 2,
-    VMBUS_CMD_TYPE_CLEANUP    = 3,
-} VMBUS_CMD_TYPE;
-
-typedef struct
-{
-    NvU32 request_id;
-    NvU32 page_count;
-    NvU64 *pPfns;
-    void *buffer;
-    NvU32 bufferlen;
-} vmbus_send_packet_cmd_params;
-
-
-typedef struct
-{
-    NvU32 override_sint;
-    NvU8 *nv_guid;
-} vmbus_setup_cmd_params;
-
 /*
 * Function prototypes
 */
--- a/kernel-open/common/inc/nv-ioctl-numa.h
+++ b/kernel-open/common/inc/nv-ioctl-numa.h
@@ -62,6 +62,7 @@ typedef struct nv_ioctl_numa_info
    uint64_t memblock_size __aligned(8);
    uint64_t numa_mem_addr __aligned(8);
    uint64_t numa_mem_size __aligned(8);
+    uint8_t  use_auto_online;
    nv_offline_addresses_t offline_addresses __aligned(8);
 } nv_ioctl_numa_info_t;

--- a/kernel-open/common/inc/nv-ioctl.h
+++ b/kernel-open/common/inc/nv-ioctl.h
@@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version

 #define NV_RM_API_VERSION_CMD_STRICT         0
 #define NV_RM_API_VERSION_CMD_RELAXED       '1'
-#define NV_RM_API_VERSION_CMD_OVERRIDE      '2'
+#define NV_RM_API_VERSION_CMD_QUERY         '2'

 #define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
 #define NV_RM_API_VERSION_REPLY_RECOGNIZED   1
--- a/kernel-open/common/inc/nv-kthread-q.h
+++ b/kernel-open/common/inc/nv-kthread-q.h
@@ -28,15 +28,10 @@
 #include <linux/list.h>             // list
 #include <linux/sched.h>            // task_struct
 #include <linux/numa.h>             // NUMA_NO_NODE
+#include <linux/semaphore.h>

 #include "conftest.h"

-#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
-    #include <linux/semaphore.h>
-#else
-    #include <asm/semaphore.h>
-#endif
-
 ////////////////////////////////////////////////////////////////////////////////
 // nv_kthread_q:
 //
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -211,6 +211,7 @@
 #include <linux/highmem.h>

 #include <linux/nodemask.h>
+#include <linux/memory.h>

 #include <linux/workqueue.h>        /* workqueue                        */
 #include "nv-kthread-q.h"           /* kthread based queue              */
@@ -510,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU64 size)

 static inline void *nv_ioremap(NvU64 phys, NvU64 size)
 {
+#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
+    void *ptr = ioremap_driver_hardened(phys, size);
+#else
    void *ptr = ioremap(phys, size);
+#endif
    if (ptr)
        NV_MEMDBG_ADD(ptr, size);
    return ptr;
@@ -523,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)

 static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
 {
-#if defined(NV_IOREMAP_CACHE_PRESENT)
-    void *ptr = ioremap_cache(phys, size);
-    if (ptr)
-        NV_MEMDBG_ADD(ptr, size);
-    return ptr;
+    void *ptr = NULL;
+#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
+    ptr = ioremap_cache_shared(phys, size);
+#elif defined(NV_IOREMAP_CACHE_PRESENT)
+    ptr = ioremap_cache(phys, size);
 #elif defined(NVCPU_PPC64LE)
    //
    // ioremap_cache() has been only implemented correctly for ppc64le with
@@ -542,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
    // (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
    // support on power.
    //
-    void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
-    if (ptr)
-        NV_MEMDBG_ADD(ptr, size);
-    return ptr;
+    ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
 #else
    return nv_ioremap(phys, size);
 #endif
+
+    if (ptr)
+        NV_MEMDBG_ADD(ptr, size);
+
+    return ptr;
 }

 static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
 {
-#if defined(NV_IOREMAP_WC_PRESENT)
-    void *ptr = ioremap_wc(phys, size);
-    if (ptr)
-        NV_MEMDBG_ADD(ptr, size);
-    return ptr;
+    void *ptr = NULL;
+#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
+    ptr = ioremap_driver_hardened_wc(phys, size);
+#elif defined(NV_IOREMAP_WC_PRESENT)
+    ptr = ioremap_wc(phys, size);
 #else
    return nv_ioremap_nocache(phys, size);
 #endif
+
+    if (ptr)
+        NV_MEMDBG_ADD(ptr, size);
+
+    return ptr;
 }

 static inline void nv_iounmap(void *ptr, NvU64 size)
@@ -633,6 +645,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
        free_pages(ptr, order);                      \
    }

+static inline pgprot_t nv_sme_clr(pgprot_t prot)
+{
+#if defined(__sme_clr)
+    return __pgprot(__sme_clr(pgprot_val(prot)));
+#else
+    return prot;
+#endif // __sme_clr
+}
+
+static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
+{
+    pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
+
+#if defined(pgprot_decrypted)
+    return pgprot_decrypted(prot);
+#else
+    return nv_sme_clr(prot);
+#endif // pgprot_decrypted
+}
+
 #if defined(PAGE_KERNEL_NOENC)
 #if defined(__pgprot_mask)
 #define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
@@ -654,7 +686,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
 #if defined(PAGE_KERNEL_NOENC)
    if (unencrypted)
    {
-        prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
+        prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
+                        nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
    }
    else
 #endif
@@ -939,26 +972,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
    return ret;
 }

-static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
-{
-    pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
-#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
-    /*
-     * When AMD memory encryption is enabled, device memory mappings with the
-     * C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
-     *
-     * If cc_mkdec() is present, then pgprot_decrypted() can't be used.
-     */
-#if defined(NV_CC_MKDEC_PRESENT)
-    prot =  __pgprot(__sme_clr(pgprot_val(vm_prot)));
-#else
-    prot = pgprot_decrypted(prot);
-#endif
-#endif
-
-    return prot;
-}
-
 static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
    NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
 {
@@ -1182,7 +1195,7 @@ typedef struct nv_alloc_s {
        NvBool zeroed      : 1;
        NvBool aliased     : 1;
        NvBool user        : 1;
-        NvBool node0       : 1;
+        NvBool node        : 1;
        NvBool peer_io     : 1;
        NvBool physical    : 1;
        NvBool unencrypted : 1;
@@ -1196,6 +1209,7 @@ typedef struct nv_alloc_s {
    unsigned int   pid;
    struct page  **user_pages;
    NvU64         guest_id;             /* id of guest VM */
+    NvS32         node_id;              /* Node id for memory allocation when node is set in flags */
    void          *import_priv;
    struct sg_table *import_sgt;
 } nv_alloc_t;
@@ -1308,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
     * SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV 
     * is active in all cases.
     */
-    if (os_sev_enabled)
+    if (os_cc_enabled)
        swiotlb_in_use = NV_TRUE;
 #endif

@@ -1436,6 +1450,35 @@ struct nv_dma_device {
    NvBool nvlink;
 };

+/* Properties of the coherent link */
+typedef struct coherent_link_info_s {
+    /* Physical Address of the GPU memory in SOC AMAP. In the case of
+     * baremetal OS environment it is System Physical Address(SPA) and in the case
+     * of virutalized OS environment it is Intermediate Physical Address(IPA) */
+    NvU64 gpu_mem_pa;
+    /* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
+     * available for adding GPU memory to the kernel as system RAM */
+    DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
+} coherent_link_info_t;
+
+#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
+/*
+ * acpi data storage structure
+ *
+ * This structure retains the pointer to the device,
+ * and any other baggage we want to carry along
+ *
+ */
+typedef struct
+{
+    nvidia_stack_t *sp;
+    struct acpi_device *device;
+    struct acpi_handle *handle;
+    void *notifier_data;
+    int notify_handler_installed;
+} nv_acpi_t;
+#endif
+
 /* linux-specific version of old nv_state_t */
 /* this is a general os-specific state structure. the first element *must* be
   the general state structure, for the generic unix-based code */
@@ -1451,6 +1494,13 @@ typedef struct nv_linux_state_s {
    /* IBM-NPU info associated with this GPU */
    nv_ibmnpu_info_t *npu;

+    /* coherent link information */
+     coherent_link_info_t coherent_link_info;
+
+    /* Dedicated queue to be used for removing FB memory which is onlined
+     * to kernel as a NUMA node. Refer Bug : 3879845*/
+    nv_kthread_q_t remove_numa_memory_q;
+
    /* NUMA node information for the platforms where GPU memory is presented
     * as a NUMA node to the kernel */
    struct {
@@ -1461,6 +1511,7 @@ typedef struct nv_linux_state_s {
        /* NUMA online/offline status for platforms that support GPU memory as
         * NUMA node */
        atomic_t status;
+        NvBool use_auto_online;
    } numa_info;

    nvidia_stack_t *sp[NV_DEV_STACK_COUNT];
@@ -1530,8 +1581,13 @@ typedef struct nv_linux_state_s {
    /* Per-device notifier block for ACPI events */
    struct notifier_block acpi_nb;

+#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
+    nv_acpi_t* nv_acpi_object;
+#endif
+
    /* Lock serializing ISRs for different SOC vectors */
    nv_spinlock_t soc_isr_lock;
+    void *soc_bh_mutex;

    struct nv_timer snapshot_timer;
    nv_spinlock_t snapshot_timer_lock;
@@ -1577,24 +1633,6 @@ extern struct rw_semaphore nv_system_pm_lock;

 extern NvBool nv_ats_supported;

-#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
-/*
- * acpi data storage structure
- *
- * This structure retains the pointer to the device,
- * and any other baggage we want to carry along
- *
- */
-typedef struct
-{
-    nvidia_stack_t *sp;
-    struct acpi_device *device;
-    struct acpi_handle *handle;
-    int notify_handler_installed;
-} nv_acpi_t;
-
-#endif
-
 /*
 * file-private data
 * hide a pointer to our data structures in a file-private ptr
@@ -1744,6 +1782,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)

 extern NvU32 NVreg_EnableUserNUMAManagement;
 extern NvU32 NVreg_RegisterPCIDriver;
+extern NvU32 NVreg_EnableResizableBar;

 extern NvU32 num_probed_nv_devices;
 extern NvU32 num_nv_devices;
@@ -1938,6 +1977,36 @@ static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status)
    return 0;
 }

+static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
+{
+    return nvl->numa_info.use_auto_online;
+}
+
+typedef struct {
+    NvU64 base;
+    NvU64 size;
+    NvU32 nodeId;
+    int ret;
+} remove_numa_memory_info_t;
+
+static void offline_numa_memory_callback
+(
+    void *args
+)
+{
+#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
+    remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
+#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
+    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
+                                               pNumaInfo->base,
+                                               pNumaInfo->size);
+#else
+    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
+                                               pNumaInfo->size);
+#endif
+#endif
+}
+
 typedef enum
 {
    NV_NUMA_STATUS_DISABLED             = 0,
--- a/kernel-open/common/inc/nv-lock.h
+++ b/kernel-open/common/inc/nv-lock.h
@@ -29,17 +29,12 @@
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h> /* signal_pending, cond_resched */
+#include <linux/semaphore.h>

 #if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT)
 #include <linux/sched/signal.h>     /* signal_pending for kernels >= 4.11 */
 #endif

-#if defined(NV_LINUX_SEMAPHORE_H_PRESENT)
-#include <linux/semaphore.h>
-#else
-#include <asm/semaphore.h>
-#endif
-
 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
 typedef raw_spinlock_t            nv_spinlock_t;
 #define NV_SPIN_LOCK_INIT(lock)   raw_spin_lock_init(lock)
@@ -62,20 +57,7 @@ typedef spinlock_t                nv_spinlock_t;
 #define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
 #endif

-#if defined(NV_CONFIG_PREEMPT_RT)
-#define NV_INIT_SEMA(sema, val) sema_init(sema,val)
-#else
-#if !defined(__SEMAPHORE_INITIALIZER) && defined(__COMPAT_SEMAPHORE_INITIALIZER)
-#define __SEMAPHORE_INITIALIZER __COMPAT_SEMAPHORE_INITIALIZER
-#endif
-#define NV_INIT_SEMA(sema, val)                    \
-    {                                              \
-        struct semaphore __sema =                  \
-            __SEMAPHORE_INITIALIZER(*(sema), val); \
-        *(sema) = __sema;                          \
-    }
-#endif
-#define NV_INIT_MUTEX(mutex) NV_INIT_SEMA(mutex, 1)
+#define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)

 static inline int nv_down_read_interruptible(struct rw_semaphore *lock)
 {
--- a/kernel-open/common/inc/nv-pci.h
+++ b/kernel-open/common/inc/nv-pci.h
@@ -27,6 +27,9 @@
 #include <linux/pci.h>
 #include "nv-linux.h"

+#define NV_GPU_BAR1 1
+#define NV_GPU_BAR3 3
+
 int nv_pci_register_driver(void);
 void nv_pci_unregister_driver(void);
 int nv_pci_count_devices(void);
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -315,6 +315,7 @@ typedef enum
    NV_SOC_IRQ_DPAUX_TYPE,
    NV_SOC_IRQ_GPIO_TYPE,
    NV_SOC_IRQ_HDACODEC_TYPE,
+    NV_SOC_IRQ_TCPC2DISP_TYPE,
    NV_SOC_IRQ_INVALID_TYPE
 } nv_soc_irq_type_t;

@@ -329,6 +330,7 @@ typedef struct nv_soc_irq_info_s {
        NvU32 gpio_num;
        NvU32 dpaux_instance;
    } irq_data;
+    NvS32 ref_count;
 } nv_soc_irq_info_t;

 #define NV_MAX_SOC_IRQS              6
@@ -345,6 +347,12 @@ typedef struct nv_soc_irq_info_s {
 /* DMA-capable device data, defined by kernel interface layer */
 typedef struct nv_dma_device nv_dma_device_t;

+typedef struct nv_phys_addr_range
+{
+    NvU64 addr;
+    NvU64 len;
+} nv_phys_addr_range_t;
+
 typedef struct nv_state_t
 {
    void  *priv;                    /* private data */
@@ -384,9 +392,11 @@ typedef struct nv_state_t
    NvS32 current_soc_irq;
    NvU32 num_soc_irqs;
    NvU32 hdacodec_irq;
+    NvU32 tcpc2disp_irq;
    NvU8 *soc_dcb_blob;
    NvU32 soc_dcb_size;
    NvU32 disp_sw_soc_chip_id;
+    NvBool soc_is_dpalt_mode_supported;

    NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
    NvU32 igpu_nonstall_irq;
@@ -462,6 +472,9 @@ typedef struct nv_state_t
    /* Bool to check if ISO iommu enabled */
    NvBool iso_iommu_present;

+    /* Bool to check if NISO iommu enabled */
+    NvBool niso_iommu_present;
+
    /* Bool to check if dma-buf is supported */
    NvBool dma_buf_supported;

@@ -473,6 +486,8 @@ typedef struct nv_state_t
    /* Bool to check if the device received a shutdown notification */
    NvBool is_shutdown;

+    /* Bool to check if the GPU has a coherent sysmem link */
+    NvBool coherent;
 } nv_state_t;

 // These define need to be in sync with defines in system.h
@@ -507,6 +522,8 @@ struct nv_file_private_t
 typedef struct gpuSession                           *nvgpuSessionHandle_t;
 typedef struct gpuDevice                            *nvgpuDeviceHandle_t;
 typedef struct gpuAddressSpace                      *nvgpuAddressSpaceHandle_t;
+typedef struct gpuTsg                               *nvgpuTsgHandle_t;
+typedef struct UvmGpuTsgAllocParams_tag              nvgpuTsgAllocParams_t;
 typedef struct gpuChannel                           *nvgpuChannelHandle_t;
 typedef struct UvmGpuChannelInfo_tag                *nvgpuChannelInfo_t;
 typedef struct UvmGpuChannelAllocParams_tag          nvgpuChannelAllocParams_t;
@@ -533,7 +550,7 @@ typedef struct UvmGpuPagingChannelAllocParams_tag    nvgpuPagingChannelAllocPara
 typedef struct UvmGpuPagingChannel_tag              *nvgpuPagingChannelHandle_t;
 typedef struct UvmGpuPagingChannelInfo_tag          *nvgpuPagingChannelInfo_t;
 typedef enum   UvmPmaGpuMemoryType_tag               nvgpuGpuMemoryType_t;
-typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
+typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
 typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);

 /*
@@ -601,6 +618,8 @@ typedef enum
 #define NV_SOC_IS_ISO_IOMMU_PRESENT(nv)     \
        ((nv)->iso_iommu_present)

+#define NV_SOC_IS_NISO_IOMMU_PRESENT(nv)     \
+        ((nv)->niso_iommu_present)
 /*
 * GPU add/remove events
 */
@@ -655,7 +674,8 @@ static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)

 static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
 {
-    return  ((nv->fb) && (offset >= nv->fb->cpu_address) &&
+    return  ((nv->fb) && (nv->fb->size != 0) &&
+             (offset >= nv->fb->cpu_address) &&
             ((offset + (length - 1)) >= offset) &&
             ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
 }
@@ -745,7 +765,7 @@ nv_state_t*  NV_API_CALL  nv_get_ctl_state       (void);
 void   NV_API_CALL  nv_set_dma_address_size      (nv_state_t *, NvU32 );

 NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
-NV_STATUS  NV_API_CALL  nv_alloc_pages           (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
+NV_STATUS  NV_API_CALL  nv_alloc_pages           (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  nv_free_pages            (nv_state_t *, NvU32, NvBool, NvU32, void *);

 NV_STATUS  NV_API_CALL  nv_register_user_pages   (nv_state_t *, NvU64, NvU64 *, void *, void **);
@@ -814,6 +834,7 @@ nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
 void               NV_API_CALL nv_put_file_private(void *);

 NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
+NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);

 NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
 NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
@@ -944,6 +965,8 @@ NV_STATUS  NV_API_CALL  rm_perform_version_check  (nvidia_stack_t *, void *, NvU

 void       NV_API_CALL  rm_power_source_change_event        (nvidia_stack_t *, NvU32);

+void       NV_API_CALL  rm_request_dnotifier_state          (nvidia_stack_t *, nv_state_t *);
+
 void       NV_API_CALL  rm_disable_gpu_state_persistence    (nvidia_stack_t *sp, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_p2p_init_mapping       (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_destroy_mapping    (nvidia_stack_t *, NvU64);
@@ -953,12 +976,12 @@ NV_STATUS  NV_API_CALL  rm_p2p_get_pages_persistent (nvidia_stack_t *,  NvU64, N
 NV_STATUS  NV_API_CALL  rm_p2p_register_callback  (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_put_pages          (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
 NV_STATUS  NV_API_CALL  rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
-NV_STATUS  NV_API_CALL  rm_p2p_dma_map_pages      (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU32, NvU32, NvU64 *, void **);
-NV_STATUS  NV_API_CALL  rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *);
+NV_STATUS  NV_API_CALL  rm_p2p_dma_map_pages      (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
+NV_STATUS  NV_API_CALL  rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
 void       NV_API_CALL  rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
-NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, NvU64 *);
-NV_STATUS  NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64);
-NV_STATUS  NV_API_CALL  rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **);
+NV_STATUS  NV_API_CALL  rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
+void       NV_API_CALL  rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
+NV_STATUS  NV_API_CALL  rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
 void       NV_API_CALL  rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
 NV_STATUS  NV_API_CALL  rm_log_gpu_crash          (nv_stack_t *, nv_state_t *);

@@ -991,6 +1014,7 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
 const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);

 void       NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
+void       NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);

 NvBool     NV_API_CALL rm_is_altstack_in_use(void);

--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -327,7 +327,7 @@ NV_STATUS nvUvmInterfaceGetPmaObject(uvmGpuDeviceHandle device,

 // Mirrors pmaEvictPagesCb_t, see its documentation in pma.h.
 typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
-                                              NvU32 pageSize,
+                                              NvU64 pageSize,
                                              NvU64 *pPages,
                                              NvU32 count,
                                              NvU64 physBegin,
@@ -390,7 +390,7 @@ void nvUvmInterfacePmaUnregisterEvictionCallbacks(void *pPma);
 */
 NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
                                      NvLength pageCount,
-                                      NvU32 pageSize,
+                                      NvU64 pageSize,
                                      UvmPmaAllocationOptions *pPmaAllocOptions,
                                      NvU64 *pPages);

@@ -419,7 +419,7 @@ NV_STATUS nvUvmInterfacePmaAllocPages(void *pPma,
 NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
                                    NvU64 *pPages,
                                    NvLength pageCount,
-                                    NvU32 pageSize,
+                                    NvU64 pageSize,
                                    NvU32 flags);

 /*******************************************************************************
@@ -447,7 +447,7 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
 NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
                                      NvU64 *pPages,
                                      NvLength pageCount,
-                                      NvU32 pageSize);
+                                      NvU64 pageSize);

 /*******************************************************************************
    nvUvmInterfaceMemoryFree
@@ -488,7 +488,7 @@ void nvUvmInterfaceMemoryFree(uvmGpuAddressSpaceHandle vaSpace,
 void nvUvmInterfacePmaFreePages(void *pPma,
                                NvU64 *pPages,
                                NvLength pageCount,
-                                NvU32 pageSize,
+                                NvU64 pageSize,
                                NvU32 flags);

 /*******************************************************************************
@@ -507,7 +507,7 @@ void nvUvmInterfacePmaFreePages(void *pPma,
 NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
                                     UvmGpuPointer gpuPointer,
                                     NvLength length, void **cpuPtr,
-                                     NvU32 pageSize);
+                                     NvU64 pageSize);

 /*******************************************************************************
    uvmGpuMemoryCpuUnmap
@@ -517,16 +517,59 @@ NV_STATUS nvUvmInterfaceMemoryCpuMap(uvmGpuAddressSpaceHandle vaSpace,
 void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
                                  void *cpuPtr);

+/*******************************************************************************
+    nvUvmInterfaceTsgAllocate
+
+    This function allocates a Time-Slice Group (TSG).
+
+    allocParams must contain an engineIndex as TSGs need to be bound to an
+    engine type at allocation time. The possible values are [0,
+    UVM_COPY_ENGINE_COUNT_MAX) for CE engine type. Notably only the copy engines
+    that have UvmGpuCopyEngineCaps::supported set to true can be allocated.
+
+    Note that TSG is not supported on all GPU architectures for all engine
+    types, e.g., pre-Volta GPUs only support TSG for the GR/Compute engine type.
+    On devices that do not support HW TSGs on the requested engine, this API is
+    still required, i.e., a TSG handle is required in
+    nvUvmInterfaceChannelAllocate(), due to information stored in it necessary
+    for channel allocation. However, when HW TSGs aren't supported, a TSG handle
+    is essentially a "fake" TSG with no HW scheduling impact.
+
+    tsg is filled with the address of the corresponding TSG handle.
+
+    Arguments:
+        vaSpace[IN]      - VA space linked to a client and a device under which
+                           the TSG is allocated.
+        allocParams[IN]  - structure with allocation settings.
+        tsg[OUT]         - pointer to the new TSG handle.
+
+    Error codes:
+      NV_ERR_GENERIC
+      NV_ERR_INVALID_ARGUMENT
+      NV_ERR_NO_MEMORY
+      NV_ERR_NOT_SUPPORTED
+*/
+NV_STATUS nvUvmInterfaceTsgAllocate(uvmGpuAddressSpaceHandle vaSpace,
+                                    const UvmGpuTsgAllocParams *allocParams,
+                                    uvmGpuTsgHandle *tsg);
+
+/*******************************************************************************
+    nvUvmInterfaceTsgDestroy
+
+    This function destroys a given TSG.
+
+    Arguments:
+        tsg[IN]         - Tsg handle
+*/
+void nvUvmInterfaceTsgDestroy(uvmGpuTsgHandle tsg);
+
 /*******************************************************************************
    nvUvmInterfaceChannelAllocate

-    This function will allocate a channel bound to a copy engine
+    This function will allocate a channel bound to a copy engine(CE) or a SEC2
+    engine.

-    allocParams must contain an engineIndex as channels need to be bound to an
-    engine type at allocation time. The possible values are [0,
-    UVM_COPY_ENGINE_COUNT_MAX), but notably only the copy engines that have
-    UvmGpuCopyEngineCaps::supported set to true can be allocated. This struct
-    also contains information relative to GPFIFO and GPPut.
+    allocParams contains information relative to GPFIFO and GPPut.

    channel is filled with the address of the corresponding channel handle.

@@ -536,17 +579,18 @@ void nvUvmInterfaceMemoryCpuUnMap(uvmGpuAddressSpaceHandle vaSpace,
    Host channel submission doorbell.

    Arguments:
-        vaSpace[IN]      - VA space linked to a client and a device under which
-                           the channel will be allocated
+        tsg[IN]          - Time-Slice Group that the channel will be a member.
        allocParams[IN]  - structure with allocation settings
        channel[OUT]     - pointer to the new channel handle
        channelInfo[OUT] - structure filled with channel information

    Error codes:
      NV_ERR_GENERIC
+      NV_ERR_INVALID_ARGUMENT
      NV_ERR_NO_MEMORY
+      NV_ERR_NOT_SUPPORTED
 */
-NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
+NV_STATUS nvUvmInterfaceChannelAllocate(const uvmGpuTsgHandle tsg,
                                        const UvmGpuChannelAllocParams *allocParams,
                                        uvmGpuChannelHandle *channel,
                                        UvmGpuChannelInfo *channelInfo);
@@ -554,7 +598,7 @@ NV_STATUS nvUvmInterfaceChannelAllocate(uvmGpuAddressSpaceHandle vaSpace,
 /*******************************************************************************
    nvUvmInterfaceChannelDestroy

-    This function destroys a given channel
+    This function destroys a given channel.

    Arguments:
        channel[IN]     - channel handle
@@ -575,7 +619,7 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
      NV_ERR_NO_MEMORY
 */
 NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
-                                  UvmGpuCaps * caps);
+                                  UvmGpuCaps *caps);

 /*******************************************************************************
    nvUvmInterfaceQueryCopyEnginesCaps
@@ -921,6 +965,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
                                               void *pFaultBuffer,
                                               NvU32 *numFaults);

+/*******************************************************************************
+    nvUvmInterfaceFlushReplayableFaultBuffer
+
+    This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
+
+    NOTES:
+    - This function DOES NOT acquire the RM API or GPU locks. That is because
+    it is called during fault servicing, which could produce deadlocks.
+
+    Arguments:
+        device[IN]        - Device handle associated with the gpu
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT
+*/
+NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
+
 /*******************************************************************************
    nvUvmInterfaceInitAccessCntrInfo

@@ -929,13 +990,15 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
    Arguments:
        device[IN]           - Device handle associated with the gpu
        pAccessCntrInfo[OUT] - Information provided by RM for access counter handling
+        accessCntrIndex[IN]  - Access counter index

    Error codes:
      NV_ERR_GENERIC
      NV_ERR_INVALID_ARGUMENT
 */
 NV_STATUS nvUvmInterfaceInitAccessCntrInfo(uvmGpuDeviceHandle device,
-                                           UvmGpuAccessCntrInfo *pAccessCntrInfo);
+                                           UvmGpuAccessCntrInfo *pAccessCntrInfo,
+                                           NvU32 accessCntrIndex);

 /*******************************************************************************
    nvUvmInterfaceDestroyAccessCntrInfo
@@ -1054,11 +1117,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
        hMemory[IN]                     -  Memory handle.
        offset [IN]                     -  Offset from the beginning of the allocation
                                           where PTE mappings should begin.
-                                           Should be aligned with pagesize associated
+                                           Should be aligned with mappingPagesize
+                                           in gpuExternalMappingInfo associated
                                           with the allocation.
        size [IN]                       -  Length of the allocation for which PTEs
                                           should be built.
-                                           Should be aligned with pagesize associated
+                                           Should be aligned with mappingPagesize
+                                           in gpuExternalMappingInfo associated
                                           with the allocation.
                                           size = 0 will be interpreted as the total size
                                           of the allocation.
@@ -1383,4 +1448,243 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
                                                char *methodStream,
                                                NvU32 methodStreamSize);

+/*******************************************************************************
+    CSL Interface and Locking
+
+    The following functions do not acquire the RM API or GPU locks and must not be called
+    concurrently with the same UvmCslContext parameter in different threads. The caller must
+    guarantee this exclusion.
+
+    * nvUvmInterfaceCslRotateIv
+    * nvUvmInterfaceCslEncrypt
+    * nvUvmInterfaceCslDecrypt
+    * nvUvmInterfaceCslSign
+    * nvUvmInterfaceCslQueryMessagePool
+    * nvUvmInterfaceCslIncrementIv
+*/
+
+/*******************************************************************************
+    nvUvmInterfaceCslInitContext
+
+    Allocates and initializes a CSL context for a given secure channel.
+
+    The lifetime of the context is the same as the lifetime of the secure channel
+    it is paired with.
+
+    Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        channel[IN]           - Handle to a secure channel.
+
+    Error codes:
+      NV_ERR_INVALID_STATE   - The system is not operating in Confidential Compute mode.
+      NV_ERR_INVALID_CHANNEL - The associated channel is not a secure channel.
+      NV_ERR_IN_USE          - The context has already been initialized.
+*/
+NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
+                                       uvmGpuChannelHandle channel);
+
+/*******************************************************************************
+    nvUvmInterfaceDeinitCslContext
+
+    Securely deinitializes and clears the contents of a context.
+
+    If context is already deinitialized then function returns immediately.
+
+    Arguments:
+        uvmCslContext[IN] - The CSL context.
+*/
+void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
+
+/*******************************************************************************
+    nvUvmInterfaceCslRotateIv
+
+    Rotates the IV for a given channel and operation.
+
+    This function will rotate the IV on both the CPU and the GPU.
+    Outstanding messages that have been encrypted by the GPU should first be
+    decrypted before calling this function with operation equal to
+    UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
+    encrypted by the CPU should first be decrypted before calling this function
+    with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
+    the channel must be idle before calling this function. This function can be
+    called regardless of the value of the IV's message counter.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        operation[IN]         - Either
+                                - UVM_CSL_OPERATION_ENCRYPT
+                                - UVM_CSL_OPERATION_DECRYPT
+
+    Error codes:
+      NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
+                                      to overflow.
+      NV_ERR_INVALID_ARGUMENT       - Invalid value for operation.
+*/
+NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
+                                    UvmCslOperation operation);
+
+/*******************************************************************************
+    nvUvmInterfaceCslEncrypt
+
+    Encrypts data and produces an authentication tag.
+
+    Auth, input, and output buffers must not overlap. If they do then calling
+    this function produces undefined behavior. Performance is typically
+    maximized when the input and output buffers are 16-byte aligned. This is
+    natural alignment for AES block.
+    The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
+    However, it is optional. If it is NULL, the next IV in line will be used.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        bufferSize[IN]        - Size of the input and output buffers in
+                                units of bytes. Value can range from 1 byte
+                                to (2^32) - 1 bytes.
+        inputBuffer[IN]       - Address of plaintext input buffer.
+        encryptIv[IN/OUT]     - IV to use for encryption. Can be NULL.
+        outputBuffer[OUT]     - Address of ciphertext output buffer.
+        authTagBuffer[OUT]    - Address of authentication tag buffer.
+                                Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT       - The size of the data is 0 bytes.
+                                    - The encryptIv has already been used.
+*/
+NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
+                                   NvU32 bufferSize,
+                                   NvU8 const *inputBuffer,
+                                   UvmCslIv *encryptIv,
+                                   NvU8 *outputBuffer,
+                                   NvU8 *authTagBuffer);
+
+/*******************************************************************************
+    nvUvmInterfaceCslDecrypt
+
+    Verifies the authentication tag and decrypts data.
+
+    Auth, input, and output buffers must not overlap. If they do then calling
+    this function produces undefined behavior. Performance is typically
+    maximized when the input and output buffers are 16-byte aligned. This is
+    natural alignment for AES block.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+    Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        bufferSize[IN]        - Size of the input and output buffers in units of bytes.
+                                Value can range from 1 byte to (2^32) - 1 bytes.
+        decryptIv[IN]         - IV used to decrypt the ciphertext. Its value can either be given by
+                                nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
+                                internal counter is used.
+        inputBuffer[IN]       - Address of ciphertext input buffer.
+        outputBuffer[OUT]     - Address of plaintext output buffer.
+        addAuthData[IN]       - Address of the plaintext additional authenticated data used to
+                                calculate the authentication tag. Can be NULL.
+        addAuthDataSize[IN]   - Size of the additional authenticated data in units of bytes.
+                                Value can range from 1 byte to (2^32) - 1 bytes.
+                                This parameter is ignored if addAuthData is NULL.
+        authTagBuffer[IN]     - Address of authentication tag buffer.
+                                Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
+
+    Error codes:
+      NV_ERR_INSUFFICIENT_RESOURCES - The decryption operation would cause a
+                                      counter overflow to occur.
+      NV_ERR_INVALID_ARGUMENT       - The size of the data is 0 bytes.
+      NV_ERR_INVALID_DATA           - Verification of the authentication tag fails.
+*/
+NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
+                                   NvU32 bufferSize,
+                                   NvU8 const *inputBuffer,
+                                   UvmCslIv const *decryptIv,
+                                   NvU8 *outputBuffer,
+                                   NvU8 const *addAuthData,
+                                   NvU32 addAuthDataSize,
+                                   NvU8 const *authTagBuffer);
+
+/*******************************************************************************
+    nvUvmInterfaceCslSign
+
+    Generates an authentication tag for secure work launch.
+
+    Auth and input buffers must not overlap. If they do then calling this function produces
+    undefined behavior.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+    Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        bufferSize[IN]        - Size of the input buffer in units of bytes.
+                                Value can range from 1 byte to (2^32) - 1 bytes.
+        inputBuffer[IN]       - Address of plaintext input buffer.
+        authTagBuffer[OUT]    - Address of authentication tag buffer.
+                                Its size is UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES.
+
+    Error codes:
+      NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
+      NV_ERR_INVALID_ARGUMENT       - The size of the data is 0 bytes.
+*/
+NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
+                                NvU32 bufferSize,
+                                NvU8 const *inputBuffer,
+                                NvU8 *authTagBuffer);
+
+/*******************************************************************************
+    nvUvmInterfaceCslQueryMessagePool
+
+    Returns the number of messages that can be encrypted before the message counter will overflow.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+    Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        operation[IN]         - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
+        messageNum[OUT]       - Number of messages left before overflow.
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
+*/
+NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
+                                            UvmCslOperation operation,
+                                            NvU64 *messageNum);
+
+/*******************************************************************************
+    nvUvmInterfaceCslIncrementIv
+
+    Increments the message counter by the specified amount.
+
+    If iv is non-NULL then the incremented value is returned.
+    If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
+    can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
+    the returned IV can be used in nvUvmInterfaceCslDecrypt.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        operation[IN]         - Either
+                                - UVM_CSL_OPERATION_ENCRYPT
+                                - UVM_CSL_OPERATION_DECRYPT
+        increment[IN]         - The amount by which the IV is incremented. Can be 0.
+        iv[out]               - If non-NULL, a buffer to store the incremented IV.
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT       - The value of the operation parameter is illegal.
+      NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
+                                      in an overflow.
+*/
+NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
+                                       UvmCslOperation operation,
+                                       NvU64 increment,
+                                       UvmCslIv *iv);
+
 #endif // _NV_UVM_INTERFACE_H_
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -92,6 +92,7 @@ typedef unsigned long long UvmGpuPointer;
 typedef struct uvmGpuSession_tag       *uvmGpuSessionHandle;       // gpuSessionHandle
 typedef struct uvmGpuDevice_tag        *uvmGpuDeviceHandle;        // gpuDeviceHandle
 typedef struct uvmGpuAddressSpace_tag  *uvmGpuAddressSpaceHandle;  // gpuAddressSpaceHandle
+typedef struct uvmGpuTsg_tag           *uvmGpuTsgHandle;           // gpuTsgHandle
 typedef struct uvmGpuChannel_tag       *uvmGpuChannelHandle;       // gpuChannelHandle
 typedef struct uvmGpuCopyEngine_tag    *uvmGpuCopyEngineHandle;    // gpuObjectHandle

@@ -110,7 +111,7 @@ typedef struct UvmGpuMemoryInfo_tag
    NvBool deviceDescendant;

    // Out: Page size associated with the phys alloc.
-    NvU32 pageSize;
+    NvU64 pageSize;

    // Out: Set to TRUE, if the allocation is contiguous.
    NvBool contig;
@@ -280,6 +281,16 @@ typedef struct UvmGpuChannelInfo_tag
    // to kick off the new work.
    //
    volatile NvU32    *pWorkSubmissionToken;
+
+    // GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
+    // so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
+    NvU64             gpFifoGpuVa;
+    NvU64             gpPutGpuVa;
+    NvU64             gpGetGpuVa;
+    // GPU VA of work submission offset is needed in Confidential Computing
+    // so CE channels can ring doorbell of other channels as required for
+    // WLC/LCIC work submission
+    NvU64             workSubmissionOffsetGpuVa;
 } UvmGpuChannelInfo;

 typedef enum
@@ -292,6 +303,17 @@ typedef enum
    UVM_BUFFER_LOCATION_VID  = 2,
 } UVM_BUFFER_LOCATION;

+typedef struct UvmGpuTsgAllocParams_tag
+{
+    // Interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
+    NvU32 engineType;
+
+    // Index of the engine the TSG is bound to.
+    // Ignored if engineType is anything other than
+    // UVM_GPU_CHANNEL_ENGINE_TYPE_CE.
+    NvU32 engineIndex;
+} UvmGpuTsgAllocParams;
+
 typedef struct UvmGpuChannelAllocParams_tag
 {
    NvU32 numGpFifoEntries;
@@ -299,13 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
    // The next two fields store UVM_BUFFER_LOCATION values
    NvU32 gpFifoLoc;
    NvU32 gpPutLoc;
-
-    // Index of the engine the channel will be bound to
-    // ignored if engineType is anything other than UVM_GPU_CHANNEL_ENGINE_TYPE_CE
-    NvU32 engineIndex;
-
-    // interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
-    NvU32 engineType;
 } UvmGpuChannelAllocParams;

 typedef struct UvmGpuPagingChannelAllocParams_tag
@@ -376,40 +391,16 @@ typedef enum

 typedef struct UvmGpuCaps_tag
 {
-    NvU32    sysmemLink;            // UVM_LINK_TYPE
-    NvU32    sysmemLinkRateMBps;    // See UvmGpuP2PCapsParams::totalLinkLineRateMBps
+    // If numaEnabled is NV_TRUE, then the system address of allocated GPU
+    // memory can be converted to struct pages. See
+    // UvmGpuInfo::systemMemoryWindowStart.
    NvBool   numaEnabled;
    NvU32    numaNodeId;
-
-    // On ATS systems, GPUs connected to different CPU sockets can have peer
-    // traffic. They are called indirect peers. However, indirect peers are
-    // mapped using sysmem aperture. In order to disambiguate the location of a
-    // specific memory address, each GPU maps its memory to a different window
-    // in the System Physical Address (SPA) space. The following fields contain
-    // the base + size of such window for the GPU. systemMemoryWindowSize
-    // different than 0 indicates that the window is valid.
-    //
-    // - If the window is valid, then we can map GPU memory to the CPU as
-    // cache-coherent by adding the GPU address to the window start.
-    // - If numaEnabled is NV_TRUE, then we can also convert the system
-    // addresses of allocated GPU memory to struct pages.
-    //
-    // TODO: Bug 1986868: fix window start computation for SIMICS
-    NvU64    systemMemoryWindowStart;
-    NvU64    systemMemoryWindowSize;
-
-    // This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
-    // all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
-    // nvswitchMemoryWindowStart tells the base address for the GPU in the
-    // NVSwitch address space. It is used when creating PTEs of memory mappings
-    // to NVSwitch peers.
-    NvBool   connectedToSwitch;
-    NvU64    nvswitchMemoryWindowStart;
 } UvmGpuCaps;

 typedef struct UvmGpuAddressSpaceInfo_tag
 {
-    NvU32           bigPageSize;
+    NvU64           bigPageSize;

    NvBool          atsEnabled;

@@ -430,12 +421,14 @@ typedef struct UvmGpuAddressSpaceInfo_tag
 typedef struct UvmGpuAllocInfo_tag
 {
    NvU64   gpuPhysOffset;          // Returns gpuPhysOffset if contiguous requested
-    NvU32   pageSize;               // default is RM big page size - 64K or 128 K" else use 4K or 2M
+    NvU64   pageSize;               // default is RM big page size - 64K or 128 K" else use 4K or 2M
    NvU64   alignment;              // Virtual alignment
    NvBool  bContiguousPhysAlloc;   // Flag to request contiguous physical allocation
    NvBool  bMemGrowsDown;          // Causes RM to reserve physical heap from top of FB
    NvBool  bPersistentVidmem;      // Causes RM to allocate persistent video memory
    NvHandle hPhysHandle;           // Handle for phys allocation either provided or retrieved
+    NvBool   bUnprotected;            // Allocation to be made in unprotected memory whenever
+                                      // SEV or GPU CC modes are enabled. Ignored otherwise
 } UvmGpuAllocInfo;

 typedef enum
@@ -516,6 +509,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
    // In: Size of the buffer to store PTEs (in bytes).
    NvU64 pteBufferSize;

+    // In: Page size for mapping
+    //     If this field is passed as 0, the page size
+    //     of the allocation is used for mapping.
+    //     nvUvmInterfaceGetExternalAllocPtes must pass
+    //     this field as zero.
+    NvU64 mappingPageSize;
+
    // In: Pointer to a buffer to store PTEs.
    // Out: The interface will fill the buffer with PTEs
    NvU64 *pteBuffer;
@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
    // Out: ATS (Address Translation Services) is supported
    NvBool atsSupported;

-    // Out: AMD SEV (Secure Encrypted Virtualization) is enabled
-    NvBool sevEnabled;
+    // Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
+    // is enabled in the VM, indicating that Confidential Computing must be
+    // also enabled in the GPU(s); these two security features are either both
+    // enabled, or both disabled.
+    NvBool confComputingEnabled;
 } UvmPlatformInfo;

 typedef struct UvmGpuClientInfo_tag
@@ -577,6 +580,20 @@ typedef struct UvmGpuClientInfo_tag
    NvHandle hSmcPartRef;
 } UvmGpuClientInfo;

+typedef enum
+{
+    UVM_GPU_CONF_COMPUTE_MODE_NONE,
+    UVM_GPU_CONF_COMPUTE_MODE_APM,
+    UVM_GPU_CONF_COMPUTE_MODE_HCC,
+    UVM_GPU_CONF_COMPUTE_MODE_COUNT
+} UvmGpuConfComputeMode;
+
+typedef struct UvmGpuConfComputeCaps_tag
+{
+    // Out: GPU's confidential compute mode
+    UvmGpuConfComputeMode mode;
+} UvmGpuConfComputeCaps;
+
 #define UVM_GPU_NAME_LENGTH 0x40

 typedef struct UvmGpuInfo_tag
@@ -641,6 +658,31 @@ typedef struct UvmGpuInfo_tag

    UvmGpuClientInfo smcUserClientInfo;

+    // Confidential Compute capabilities of this GPU
+    UvmGpuConfComputeCaps gpuConfComputeCaps;
+
+    // UVM_LINK_TYPE
+    NvU32 sysmemLink;
+
+    // See UvmGpuP2PCapsParams::totalLinkLineRateMBps
+    NvU32 sysmemLinkRateMBps;
+
+    // On coherent systems each GPU maps its memory to a window in the System
+    // Physical Address (SPA) space. The following fields describe that window.
+    //
+    // systemMemoryWindowSize > 0 indicates that the window is valid. meaning
+    // that GPU memory can be mapped by the CPU as cache-coherent by adding the
+    // GPU address to the window start.
+    NvU64 systemMemoryWindowStart;
+    NvU64 systemMemoryWindowSize;
+
+    // This tells if the GPU is connected to NVSwitch. On systems with NVSwitch
+    // all GPUs are connected to it. If connectedToSwitch is NV_TRUE,
+    // nvswitchMemoryWindowStart tells the base address for the GPU in the
+    // NVSwitch address space. It is used when creating PTEs of memory mappings
+    // to NVSwitch peers.
+    NvBool connectedToSwitch;
+    NvU64 nvswitchMemoryWindowStart;
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
@@ -683,6 +725,9 @@ typedef struct UvmPmaStatistics_tag
    volatile NvU64 numPages2m;                // PMA-wide 2MB pages count across all regions
    volatile NvU64 numFreePages64k;           // PMA-wide free 64KB page count across all regions
    volatile NvU64 numFreePages2m;            // PMA-wide free 2MB pages count across all regions
+    volatile NvU64 numPages2mProtected;       // PMA-wide 2MB pages count in protected memory
+    volatile NvU64 numFreePages64kProtected;  // PMA-wide free 64KB page count in protected memory
+    volatile NvU64 numFreePages2mProtected;   // PMA-wide free 2MB pages count in protected memory
 } UvmPmaStatistics;

 /*******************************************************************************
@@ -790,24 +835,92 @@ struct UvmOpsUvmEvents
 #endif
 };

+#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
+#define UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES 16
+
+typedef union UvmFaultMetadataPacket_tag
+{
+    struct {
+        NvU8   authTag[UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES];
+        NvBool valid;
+    };
+    // padding to 32Bytes
+    NvU8 _padding[32];
+} UvmFaultMetadataPacket;
+
+// This struct shall not be accessed nor modified directly by UVM as it is
+// entirely managed by the RM layer
+typedef struct UvmCslContext_tag
+{
+    struct ccslContext_t *ctx;
+    void *nvidia_stack;
+} UvmCslContext;
+
 typedef struct UvmGpuFaultInfo_tag
 {
    struct
    {
-        // Register mappings obtained from RM
+        // Fault buffer GET register mapping.
+        //
+        // When Confidential Computing is enabled, GET refers to the shadow
+        // buffer (see bufferAddress below), and not to the actual HW buffer.
+        // In this setup, writes of GET (by UVM) do not result on re-evaluation
+        // of any interrupt condition.
        volatile NvU32* pFaultBufferGet;
+
+        // Fault buffer PUT register mapping.
+        //
+        // When Confidential Computing is enabled, PUT refers to the shadow
+        // buffer (see bufferAddress below), and not to the actual HW buffer.
+        // In this setup, writes of PUT (by GSP-RM) do not result on
+        // re-evaluation of any interrupt condition.
        volatile NvU32* pFaultBufferPut;
-        // Note: this variable is deprecated since buffer overflow is not a separate
-        // register from future chips.
+
+        // Note: this variable is deprecated since buffer overflow is not a
+        // separate register from future chips.
        volatile NvU32* pFaultBufferInfo;
+
+        // Register mapping used to clear a replayable fault interrupt in
+        // Turing+ GPUs.
        volatile NvU32* pPmcIntr;
+
+        // Register mapping used to enable replayable fault interrupts.
        volatile NvU32* pPmcIntrEnSet;
+
+        // Register mapping used to disable replayable fault interrupts.
        volatile NvU32* pPmcIntrEnClear;
+
+        // Register used to enable, or disable, faults on prefetches.
        volatile NvU32* pPrefetchCtrl;
+
+        // Replayable fault interrupt mask identifier.
        NvU32 replayableFaultMask;
-        // fault buffer cpu mapping and size
-        void* bufferAddress;
+
+        // Fault buffer CPU mapping
+        void*  bufferAddress;
+        //
+        // When Confidential Computing is disabled, the mapping points to the
+        // actual HW fault buffer.
+        //
+        // When Confidential Computing is enabled, the mapping points to a
+        // copy of the HW fault buffer. This "shadow buffer" is maintained
+        // by GSP-RM.
+
+        // Size, in bytes, of the fault buffer pointed by bufferAddress.
        NvU32  bufferSize;
+        // Mapping pointing to the start of the fault buffer metadata containing
+        // a 16Byte authentication tag and a valid byte. Always NULL when
+        // Confidential Computing is disabled.
+        UvmFaultMetadataPacket *bufferMetadata;
+
+        // CSL context used for performing decryption of replayable faults when
+        // Confidential Computing is enabled.
+        UvmCslContext cslCtx;
+
+        // Indicates whether UVM owns the replayable fault buffer.
+        // The value of this field is always NV_TRUE When Confidential Computing
+        // is disabled.
+        NvBool bUvmOwnsHwFaultBuffer;
    } replayable;
    struct
    {
@@ -826,8 +939,20 @@ typedef struct UvmGpuFaultInfo_tag

        // Preallocated stack for functions called from the UVM isr bottom half
        void *isr_bh_sp;
+
+        // Used only when Hopper Confidential Compute is enabled
+        // Register mappings obtained from RM
+        volatile NvU32* pFaultBufferPut;
+
+        // Used only when Hopper Confidential Compute is enabled
+        // Cached get index of the non-replayable shadow buffer
+        NvU32 shadowBufferGet;
+
+        // See replayable.bufferMetadata
+        UvmFaultMetadataPacket  *shadowBufferMetadata;
    } nonReplayable;
    NvHandle faultBufferHandle;
+    struct Device *pDevice;
 } UvmGpuFaultInfo;

 struct Device;
@@ -863,12 +988,6 @@ typedef struct UvmGpuAccessCntrInfo_tag
    void* bufferAddress;
    NvU32  bufferSize;
    NvHandle accessCntrBufferHandle;
-
-    // The Notification address in the access counter notification msg does not
-    // contain the correct upper bits 63-47 for GPA-based notifications. RM
-    // provides us with the correct offset to be added.
-    // See Bug 1803015
-    NvU64 baseDmaSysmemAddr;
 } UvmGpuAccessCntrInfo;

 typedef enum
@@ -911,6 +1030,7 @@ typedef enum UvmPmaGpuMemoryType_tag
 } UVM_PMA_GPU_MEMORY_TYPE;

 typedef UvmGpuChannelInfo gpuChannelInfo;
+typedef UvmGpuTsgAllocParams gpuTsgAllocParams;
 typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
 typedef UvmGpuCaps gpuCaps;
 typedef UvmGpuCopyEngineCaps gpuCeCaps;
@@ -935,4 +1055,16 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
 typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
 typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;

+typedef struct UvmCslIv
+{
+    NvU8 iv[12];
+    NvU8 fresh;
+} UvmCslIv;
+
+typedef enum UvmCslOperation
+{
+    UVM_CSL_OPERATION_ENCRYPT,
+    UVM_CSL_OPERATION_DECRYPT
+} UvmCslOperation;
+
 #endif // _NV_UVM_TYPES_H_
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -165,8 +165,6 @@ struct NvKmsKapiConnectorInfo {

    NvU32 physicalIndex;

-    NvU32 headMask;
-
    NvKmsConnectorSignalFormat signalFormat;
    NvKmsConnectorType         type;

@@ -194,6 +192,7 @@ struct NvKmsKapiStaticDisplayInfo {
    NvU32  numPossibleClones;
    NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];

+    NvU32 headMask;
 };

 struct NvKmsKapiSyncpt {
@@ -520,14 +519,23 @@ struct NvKmsKapiFunctionsTable {
    );

    /*!
-     * Revoke modeset permissions previously granted. This currently applies for all
-     * previous grant requests for this device.
+     * Revoke permissions previously granted. Only one (dispIndex, head,
+     * display) is currently supported.
     *
-     * \param [in]  device                  A device returned by allocateDevice().
+     * \param [in]  device     A device returned by allocateDevice().
+     *
+     * \param [in]  head       head of display.
+     *
+     * \param [in]  display    The display to revoke.
     *
     * \return NV_TRUE on success, NV_FALSE on failure.
     */
-    NvBool (*revokePermissions)(struct NvKmsKapiDevice *device);
+    NvBool (*revokePermissions)
+    (
+        struct NvKmsKapiDevice *device,
+        NvU32 head,
+        NvKmsKapiDisplay display
+    );

    /*!
     * Registers for notification, via
@@ -1065,6 +1073,21 @@ struct NvKmsKapiFunctionsTable {
        NvU64 *pPages
    );

+     /*!
+     * Check if this memory object can be scanned out for display.
+     *
+     * \param [in]  device  A device allocated using allocateDevice().
+     *
+     * \param [in]  memory  The memory object to check for display support.
+     *
+     * \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
+     */
+    NvBool (*isMemoryValidForDisplay)
+    (
+        const struct NvKmsKapiDevice *device,
+        const struct NvKmsKapiMemory *memory
+    );
+
    /*
     * Import SGT as a memory handle.
     *
--- a/kernel-open/common/inc/nvlimits.h
+++ b/kernel-open/common/inc/nvlimits.h
@@ -25,7 +25,7 @@

 //
 // This file was generated with FINN, an NVIDIA coding tool.
-// Source file: nvlimits.finn
+// Source file:      nvlimits.finn
 //


--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@@ -149,6 +149,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR,           0x00000077, "Nvlink Train
 NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR,      0x00000078, "Nvlink Configuration Error")
 NV_STATUS_CODE(NV_ERR_RISCV_ERROR,                     0x00000079, "Generic RISC-V assert or halt")
 NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT,      0x0000007A, "Fabric Manager is not loaded")
+NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED,               0x0000007B, "Semaphore Surface value already >= requested wait value")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
--- a/kernel-open/common/inc/nvtypes.h
+++ b/kernel-open/common/inc/nvtypes.h
@@ -513,6 +513,12 @@ typedef struct
 // place to re-locate these from nvos.h which cannot be included by a number
 // of builds that need them

+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+    #define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+    #define NV_ATTRIBUTE_UNUSED
+#endif
+
 #if defined(_MSC_VER)

    #if _MSC_VER >= 1310
@@ -536,8 +542,6 @@ typedef struct

    #define NV_FORCERESULTCHECK

-    #define NV_ATTRIBUTE_UNUSED
-
    #define NV_FORMAT_PRINTF(_f, _a)

 #else // ! defined(_MSC_VER)
@@ -635,12 +639,6 @@ typedef struct
        #define NV_FORCERESULTCHECK
    #endif

-    #if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
-        #define NV_ATTRIBUTE_UNUSED __attribute__((__unused__))
-    #else
-        #define NV_ATTRIBUTE_UNUSED
-    #endif
-
    /*
     * Functions decorated with NV_FORMAT_PRINTF(f, a) have a format string at
     * parameter number 'f' and variadic arguments start at parameter number 'a'.
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -181,7 +181,6 @@ NV_STATUS   NV_API_CALL  os_put_page                 (NvU64 address);
 NvU32       NV_API_CALL  os_get_page_refcount        (NvU64 address);
 NvU32       NV_API_CALL  os_count_tail_pages         (NvU64 address);
 void        NV_API_CALL  os_free_pages_phys          (NvU64, NvU32);
-NV_STATUS   NV_API_CALL  os_call_nv_vmbus            (NvU32, void *);
 NV_STATUS   NV_API_CALL  os_open_temporary_file      (void **);
 void        NV_API_CALL  os_close_file               (void *);
 NV_STATUS   NV_API_CALL  os_write_file               (void *, NvU8 *, NvU64, NvU64);
@@ -189,7 +188,7 @@ NV_STATUS   NV_API_CALL  os_read_file                (void *, NvU8 *, NvU64, NvU
 NV_STATUS   NV_API_CALL  os_open_readonly_file       (const char *, void **);
 NV_STATUS   NV_API_CALL  os_open_and_read_file       (const char *, NvU8 *, NvU64);
 NvBool      NV_API_CALL  os_is_nvswitch_present      (void);
-void        NV_API_CALL  os_get_random_bytes         (NvU8 *, NvU16);
+NV_STATUS   NV_API_CALL  os_get_random_bytes         (NvU8 *, NvU16);
 NV_STATUS   NV_API_CALL  os_alloc_wait_queue         (os_wait_queue **);
 void        NV_API_CALL  os_free_wait_queue          (os_wait_queue *);
 void        NV_API_CALL  os_wait_uninterruptible     (os_wait_queue *);
@@ -208,12 +207,15 @@ enum os_pci_req_atomics_type {
    OS_INTF_PCIE_REQ_ATOMICS_128BIT
 };
 NV_STATUS   NV_API_CALL  os_enable_pci_req_atomics   (void *, enum os_pci_req_atomics_type);
+NV_STATUS   NV_API_CALL  os_numa_add_gpu_memory      (void *, NvU64, NvU64, NvU32 *);
+NV_STATUS   NV_API_CALL  os_numa_remove_gpu_memory   (void *, NvU64, NvU64, NvU32); 
+NV_STATUS   NV_API_CALL  os_offline_page_at_address(NvU64 address);

 extern NvU32 os_page_size;
 extern NvU64 os_page_mask;
 extern NvU8  os_page_shift;
-extern NvU32 os_sev_status;
-extern NvBool os_sev_enabled;
+extern NvBool os_cc_enabled;
+extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;

 /*
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -56,7 +56,9 @@ NV_STATUS  NV_API_CALL  rm_gpu_ops_get_p2p_caps(nvidia_stack_t *, nvgpuDeviceHan

 NV_STATUS  NV_API_CALL  rm_gpu_ops_memory_cpu_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, NvLength, void **, NvU32);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_memory_cpu_ummap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, void*);
-NV_STATUS  NV_API_CALL  rm_gpu_ops_channel_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
+NV_STATUS  NV_API_CALL  rm_gpu_ops_tsg_allocate(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, const nvgpuTsgAllocParams_t *, nvgpuTsgHandle_t *);
+NV_STATUS  NV_API_CALL  rm_gpu_ops_tsg_destroy(nvidia_stack_t *, nvgpuTsgHandle_t);
+NV_STATUS  NV_API_CALL  rm_gpu_ops_channel_allocate(nvidia_stack_t *, const nvgpuTsgHandle_t, const nvgpuChannelAllocParams_t *, nvgpuChannelHandle_t *, nvgpuChannelInfo_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_channel_destroy(nvidia_stack_t *, nvgpuChannelHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_memory_free(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64);
 NV_STATUS  NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuCaps_t);
@@ -74,8 +76,9 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
-NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
@@ -98,4 +101,14 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
 void       NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);

+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
+
 #endif
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
--- a/kernel-open/nvidia-drm/nvidia-drm-connector.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-connector.c
@@ -27,6 +27,7 @@
 #include "nvidia-drm-helper.h"
 #include "nvidia-drm-priv.h"
 #include "nvidia-drm-connector.h"
+#include "nvidia-drm-crtc.h"
 #include "nvidia-drm-utils.h"
 #include "nvidia-drm-encoder.h"

@@ -207,6 +208,11 @@ done:

    nv_drm_free(pDetectParams);

+    if (status == connector_status_disconnected &&
+        nv_connector->modeset_permission_filep) {
+        nv_drm_connector_revoke_permissions(dev, nv_connector);
+    }
+
    return status;
 }

@@ -372,6 +378,8 @@ nv_drm_connector_new(struct drm_device *dev,
    nv_connector->physicalIndex = physicalIndex;
    nv_connector->type     = type;
    nv_connector->internal = internal;
+    nv_connector->modeset_permission_filep = NULL;
+    nv_connector->modeset_permission_crtc = NULL;

    strcpy(nv_connector->dpAddress, dpAddress);

@@ -474,4 +482,26 @@ done:
    return connector;
 }

+/*
+ * Revoke the permissions on this connector.
+ */
+bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
+                                         struct nv_drm_connector* nv_connector)
+{
+    struct nv_drm_device *nv_dev = to_nv_device(dev);
+    bool ret = true;
+
+    if (nv_connector->modeset_permission_crtc) {
+        if (nv_connector->nv_detected_encoder) {
+            ret = nvKms->revokePermissions(
+                nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
+                nv_connector->nv_detected_encoder->hDisplay);
+        }
+        nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
+        nv_connector->modeset_permission_crtc = NULL;
+    }
+    nv_connector->modeset_permission_filep = NULL;
+    return ret;
+}
+
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-connector.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-connector.h
@@ -51,6 +51,20 @@ struct nv_drm_connector {

    atomic_t connection_status_dirty;

+    /**
+     * @modeset_permission_filep:
+     *
+     * The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
+     */
+    struct drm_file *modeset_permission_filep;
+
+    /**
+     * @modeset_permission_crtc:
+     *
+     * The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
+     */
+    struct nv_drm_crtc *modeset_permission_crtc;
+
    struct drm_connector base;
 };

@@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
                     NvBool internal,
                     char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);

+bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
+                                         struct nv_drm_connector *nv_connector);
+
 #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */

 #endif /* __NVIDIA_DRM_CONNECTOR_H__ */
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c
@@ -44,6 +44,8 @@

 #if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
 #include <linux/nvhost.h>
+#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)            
+#include <linux/host1x-next.h>
 #endif

 #if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
@@ -361,6 +363,21 @@ plane_req_config_update(struct drm_plane *plane,

        if (nv_drm_plane_state->fd_user_ptr) {
            req_config->config.syncptParams.postSyncptRequested = true;
+        }           
+#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)            
+        if (plane_state->fence != NULL) {            
+            int ret = host1x_fence_extract(            
+                      plane_state->fence,            
+                      &req_config->config.syncptParams.preSyncptId,            
+                      &req_config->config.syncptParams.preSyncptValue);            
+            if (ret != 0) {            
+                return ret;            
+            }            
+            req_config->config.syncptParams.preSyncptSpecified = true;            
+        }            
+
+        if (nv_drm_plane_state->fd_user_ptr) {            
+            req_config->config.syncptParams.postSyncptRequested = true;            
        }
 #else
        return -1;
@@ -1181,6 +1198,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
    nv_crtc->head = head;
    INIT_LIST_HEAD(&nv_crtc->flip_list);
    spin_lock_init(&nv_crtc->flip_list_lock);
+    nv_crtc->modeset_permission_filep = NULL;

    ret = drm_crtc_init_with_planes(nv_dev->dev,
                                    &nv_crtc->base,
@@ -1329,7 +1347,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
        return -ENOENT;
    }

-    crtc = nv_drm_crtc_find(dev, params->crtc_id);
+    crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
    if (!crtc) {
        return -ENOENT;
    }
@@ -1357,7 +1375,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
        return -ENOENT;
    }

-    crtc = nv_drm_crtc_find(dev, params->crtc_id);
+    crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
    if (!crtc) {
        return -ENOENT;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.h
@@ -35,38 +35,9 @@

 #include <drm/drm_crtc.h>

-#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
-/* For DRM_ROTATE_* , DRM_REFLECT_* */
-#include <drm/drm_blend.h>
-#endif
-
-#if defined(NV_DRM_ROTATION_AVAILABLE)
-/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
-#include <uapi/drm/drm_mode.h>
-#endif
-
 #include "nvtypes.h"
 #include "nvkms-kapi.h"

-#if defined(NV_DRM_ROTATION_AVAILABLE)
-/*
- * 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
- * DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
- * DRM_ROTATE_* and DRM_MODE_REFLECT_*
- */
-#if !defined(DRM_MODE_ROTATE_0)
-#define DRM_MODE_ROTATE_0       DRM_ROTATE_0
-#define DRM_MODE_ROTATE_90      DRM_ROTATE_90
-#define DRM_MODE_ROTATE_180     DRM_ROTATE_180
-#define DRM_MODE_ROTATE_270     DRM_ROTATE_270
-#define DRM_MODE_REFLECT_X      DRM_REFLECT_X
-#define DRM_MODE_REFLECT_Y      DRM_REFLECT_Y
-#define DRM_MODE_ROTATE_MASK    DRM_ROTATE_MASK
-#define DRM_MODE_REFLECT_MASK   DRM_REFLECT_MASK
-#endif
-
-#endif //NV_DRM_ROTATION_AVAILABLE
-
 struct nv_drm_crtc {
    NvU32 head;

@@ -85,6 +56,13 @@ struct nv_drm_crtc {
     */
    spinlock_t flip_list_lock;

+    /**
+     * @modeset_permission_filep:
+     *
+     * The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
+     */
+    struct drm_file *modeset_permission_filep;
+
    struct drm_crtc base;
 };

--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -30,7 +30,7 @@
 #include "nvidia-drm-connector.h"
 #include "nvidia-drm-gem.h"
 #include "nvidia-drm-crtc.h"
-#include "nvidia-drm-prime-fence.h"
+#include "nvidia-drm-fence.h"
 #include "nvidia-drm-helper.h"
 #include "nvidia-drm-gem-nvkms-memory.h"
 #include "nvidia-drm-gem-user-memory.h"
@@ -706,6 +706,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
    return 0;
 }

+static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
+                                         void *data, struct drm_file *filep)
+{
+    /* check the pDevice since this only gets set if modeset = 1
+     * which is a requirement for the dma_buf extension to work
+     */
+    struct nv_drm_device *nv_dev = to_nv_device(dev);
+    return nv_dev->pDevice ? 0 : -EINVAL;
+}
+
 static
 int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
                                       void *data, struct drm_file *filep)
@@ -735,6 +745,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
    return 0;
 }

+#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
+static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
+                                       NvU32 dpyId)
+{
+    struct nv_drm_connector *nv_connector = to_nv_connector(connector);
+    return nv_connector->nv_detected_encoder &&
+           nv_connector->nv_detected_encoder->hDisplay == dpyId;
+}
+
+static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
+                                                    void *data,
+                                                    struct drm_file *filep)
+{
+    struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
+    // Importantly, drm_connector_lookup (with filep) will only return the
+    // connector if we are master, a lessee with the connector, or not master at
+    // all. It will return NULL if we are a lessee with other connectors.
+    struct drm_connector *connector =
+        nv_drm_connector_lookup(dev, filep, params->connectorId);
+    struct nv_drm_connector *nv_connector;
+    int ret = 0;
+
+    if (!connector) {
+        return -EINVAL;
+    }
+
+    nv_connector = to_nv_connector(connector);
+    if (!nv_connector) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    if (!nv_connector->nv_detected_encoder) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
+
+done:
+    nv_drm_connector_put(connector);
+    return ret;
+}
+
+static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
+                                                    void *data,
+                                                    struct drm_file *filep)
+{
+    struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
+    struct drm_connector *connector;
+    int ret = -EINVAL;
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    struct drm_connector_list_iter conn_iter;
+    nv_drm_connector_list_iter_begin(dev, &conn_iter);
+#endif
+
+    /* Lookup for existing connector with same dpyId */
+    nv_drm_for_each_connector(connector, &conn_iter, dev) {
+        if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
+            params->connectorId = connector->base.id;
+            ret = 0;
+            break;
+        }
+    }
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_end(&conn_iter);
+#endif
+
+    return ret;
+}
+
+static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
+{
+    struct nv_drm_connector *nv_connector = to_nv_connector(connector);
+
+    if (connector->state && connector->state->crtc) {
+        struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
+        return NVBIT(nv_crtc->head);
+    } else if (nv_connector->nv_detected_encoder &&
+               nv_connector->nv_detected_encoder->base.crtc) {
+        struct nv_drm_crtc *nv_crtc =
+            to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
+        return NVBIT(nv_crtc->head);
+    }
+
+    return 0;
+}
+
+static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
+                                         struct drm_file *filep)
+{
+    struct drm_nvidia_grant_permissions_params *params = data;
+    struct nv_drm_device *nv_dev = to_nv_device(dev);
+    struct nv_drm_connector *target_nv_connector = NULL;
+    struct nv_drm_crtc *target_nv_crtc = NULL;
+    struct drm_connector *connector, *target_connector = NULL;
+    struct drm_crtc *crtc;
+    NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
+    int ret = 0;
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    struct drm_connector_list_iter conn_iter;
+#endif
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    struct drm_modeset_acquire_ctx ctx;
+    DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
+                               ret);
+#else
+    mutex_lock(&dev->mode_config.mutex);
+#endif
+
+    /* Get the connector for the dpyId. */
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_begin(dev, &conn_iter);
+#endif
+    nv_drm_for_each_connector(connector, &conn_iter, dev) {
+        if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
+            target_connector =
+                nv_drm_connector_lookup(dev, filep, connector->base.id);
+            break;
+        }
+    }
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_end(&conn_iter);
+#endif
+
+    // Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
+    // return the object if we are master, a lessee with the object, or not
+    // master at all. It will return NULL if we are a lessee with other objects.
+    if (!target_connector) {
+        ret = -EINVAL;
+        goto done;
+    }
+    target_nv_connector = to_nv_connector(target_connector);
+    possible_crtcs =
+        target_nv_connector->nv_detected_encoder->base.possible_crtcs;
+
+    /* Target connector must not be previously granted. */
+    if (target_nv_connector->modeset_permission_filep) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    /* Add all heads that are owned and not already granted. */
+    freeHeadBits = 0;
+    nv_drm_for_each_crtc(crtc, dev) {
+        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
+        if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
+            !nv_crtc->modeset_permission_filep &&
+            (drm_crtc_mask(crtc) & possible_crtcs)) {
+            freeHeadBits |= NVBIT(nv_crtc->head);
+        }
+    }
+
+    targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
+    if (targetHeadBit & freeHeadBits) {
+        /* If a crtc is already being used by this connector, use it. */
+        freeHeadBits = targetHeadBit;
+    } else {
+        /* Otherwise, remove heads that are in use by other connectors. */
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+        nv_drm_connector_list_iter_begin(dev, &conn_iter);
+#endif
+        nv_drm_for_each_connector(connector, &conn_iter, dev) {
+            freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
+        }
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+        nv_drm_connector_list_iter_end(&conn_iter);
+#endif
+    }
+
+    /* Fail if no heads are available. */
+    if (!freeHeadBits) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    /*
+     * Loop through the crtc again and find a matching head.
+     * Record the filep that is using the crtc and the connector.
+     */
+    nv_drm_for_each_crtc(crtc, dev) {
+        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
+        if (freeHeadBits & NVBIT(nv_crtc->head)) {
+            target_nv_crtc = nv_crtc;
+            head = nv_crtc->head;
+            break;
+        }
+    }
+
+    if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
+                                 params->dpyId)) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    target_nv_connector->modeset_permission_crtc = target_nv_crtc;
+    target_nv_connector->modeset_permission_filep = filep;
+    target_nv_crtc->modeset_permission_filep = filep;
+
+done:
+    if (target_connector) {
+        nv_drm_connector_put(target_connector);
+    }
+
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+#else
+    mutex_unlock(&dev->mode_config.mutex);
+#endif
+
+    return ret;
+}
+
+static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
+                                    struct nv_drm_connector *nv_connector)
+{
+    bool ret = true;
+    if (nv_connector->modeset_permission_crtc) {
+        if (nv_connector->nv_detected_encoder) {
+            ret = nvKms->revokePermissions(
+                nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
+                nv_connector->nv_detected_encoder->hDisplay);
+        }
+        nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
+        nv_connector->modeset_permission_crtc = NULL;
+    }
+    nv_connector->modeset_permission_filep = NULL;
+    return ret;
+}
+
+static int nv_drm_revoke_permission(struct drm_device *dev,
+                                    struct drm_file *filep, NvU32 dpyId)
+{
+    struct drm_connector *connector;
+    struct drm_crtc *crtc;
+    int ret = 0;
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    struct drm_connector_list_iter conn_iter;
+#endif
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    struct drm_modeset_acquire_ctx ctx;
+    DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
+                               ret);
+#else
+    mutex_lock(&dev->mode_config.mutex);
+#endif
+
+    /*
+     * If dpyId is set, only revoke those specific resources. Otherwise,
+     * it is from closing the file so revoke all resources for that filep.
+     */
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_begin(dev, &conn_iter);
+#endif
+    nv_drm_for_each_connector(connector, &conn_iter, dev) {
+        struct nv_drm_connector *nv_connector = to_nv_connector(connector);
+        if (nv_connector->modeset_permission_filep == filep &&
+            (!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
+            if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
+                ret = -EINVAL;
+                // Continue trying to revoke as much as possible.
+            }
+        }
+    }
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_end(&conn_iter);
+#endif
+
+    nv_drm_for_each_crtc(crtc, dev) {
+        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
+        if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
+            nv_crtc->modeset_permission_filep = NULL;
+        }
+    }
+
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+#else
+    mutex_unlock(&dev->mode_config.mutex);
+#endif
+
+    return ret;
+}
+
+static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
+                                          struct drm_file *filep)
+{
+    struct drm_nvidia_revoke_permissions_params *params = data;
+    if (!params->dpyId) {
+        return -EINVAL;
+    }
+    return nv_drm_revoke_permission(dev, filep, params->dpyId);
+}
+
+static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
+{
+    /*
+     * Some systems like android can reach here without initializing the
+     * device, so check for that.
+     */
+    if (dev->mode_config.num_crtc > 0 &&
+        dev->mode_config.crtc_list.next != NULL &&
+        dev->mode_config.crtc_list.prev != NULL &&
+        dev->mode_config.num_connector > 0 &&
+        dev->mode_config.connector_list.next != NULL &&
+        dev->mode_config.connector_list.prev != NULL) {
+        nv_drm_revoke_permission(dev, filep, 0);
+    }
+}
+#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
+
+#if defined(NV_DRM_MASTER_HAS_LEASES)
+static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
+                                             int lessee_id)
+{
+    int object;
+    void *entry;
+
+    while (master->lessor != NULL) {
+        master = master->lessor;
+    }
+
+    idr_for_each_entry(&master->lessee_idr, entry, object)
+    {
+        if (object == lessee_id) {
+            return entry;
+        }
+    }
+
+    return NULL;
+}
+
+static void nv_drm_get_revoked_objects(struct drm_device *dev,
+                                       struct drm_file *filep, unsigned int cmd,
+                                       unsigned long arg, int **objects,
+                                       int *objects_count)
+{
+    unsigned int ioc_size;
+    struct drm_mode_revoke_lease revoke_lease;
+    struct drm_master *lessor, *lessee;
+    void *entry;
+    int *objs;
+    int obj, obj_count, obj_i;
+
+    ioc_size = _IOC_SIZE(cmd);
+    if (ioc_size > sizeof(revoke_lease)) {
+        return;
+    }
+
+    if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
+        return;
+    }
+
+    lessor = nv_drm_file_get_master(filep);
+    if (lessor == NULL) {
+        return;
+    }
+
+    mutex_lock(&dev->mode_config.idr_mutex);
+    lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
+
+    if (lessee == NULL) {
+        goto done;
+    }
+
+    obj_count = 0;
+    idr_for_each_entry(&lessee->leases, entry, obj) {
+        ++obj_count;
+    }
+    if (obj_count == 0) {
+        goto done;
+    }
+
+    objs = nv_drm_calloc(obj_count, sizeof(int));
+    if (objs == NULL) {
+        goto done;
+    }
+
+    obj_i = 0;
+    idr_for_each_entry(&lessee->leases, entry, obj) {
+        objs[obj_i++] = obj;
+    }
+    *objects = objs;
+    *objects_count = obj_count;
+
+done:
+    mutex_unlock(&dev->mode_config.idr_mutex);
+    drm_master_put(&lessor);
+}
+
+static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
+{
+    int i;
+    for (i = 0; i < objects_count; ++i) {
+        if (objects[i] == object) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void nv_drm_finish_revoking_objects(struct drm_device *dev,
+                                           struct drm_file *filep, int *objects,
+                                           int objects_count)
+{
+    struct drm_connector *connector;
+    struct drm_crtc *crtc;
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    struct drm_connector_list_iter conn_iter;
+#endif
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    int ret = 0;
+    struct drm_modeset_acquire_ctx ctx;
+    DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
+                               ret);
+#else
+    mutex_lock(&dev->mode_config.mutex);
+#endif
+
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_begin(dev, &conn_iter);
+#endif
+    nv_drm_for_each_connector(connector, &conn_iter, dev) {
+        struct nv_drm_connector *nv_connector = to_nv_connector(connector);
+        if (nv_connector->modeset_permission_filep &&
+            nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
+            nv_drm_connector_revoke_permissions(dev, nv_connector);
+        }
+    }
+#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
+    nv_drm_connector_list_iter_end(&conn_iter);
+#endif
+
+    nv_drm_for_each_crtc(crtc, dev) {
+        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
+        if (nv_crtc->modeset_permission_filep &&
+            nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
+            nv_crtc->modeset_permission_filep = NULL;
+        }
+    }
+
+#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
+    DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+#else
+    mutex_unlock(&dev->mode_config.mutex);
+#endif
+}
+#endif /* NV_DRM_MASTER_HAS_LEASES */
+
 #if defined(NV_DRM_BUS_PRESENT)

 #if defined(NV_DRM_BUS_HAS_GET_IRQ)
@@ -766,12 +1225,50 @@ static struct drm_bus nv_drm_bus = {

 #endif /* NV_DRM_BUS_PRESENT */

+/*
+ * Wrapper around drm_ioctl to hook in to upstream ioctl.
+ *
+ * Currently used to add additional handling to REVOKE_LEASE.
+ */
+static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+    long retcode;
+
+#if defined(NV_DRM_MASTER_HAS_LEASES)
+    struct drm_file *file_priv = filp->private_data;
+    struct drm_device *dev = file_priv->minor->dev;
+    int *objects = NULL;
+    int objects_count = 0;
+
+    if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
+        // Save the revoked objects before revoking.
+        nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
+                                   &objects_count);
+    }
+#endif
+
+    retcode = drm_ioctl(filp, cmd, arg);
+
+#if defined(NV_DRM_MASTER_HAS_LEASES)
+    if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
+        if (retcode == 0) {
+            // If revoking was successful, finish revoking the objects.
+            nv_drm_finish_revoking_objects(dev, file_priv, objects,
+                                           objects_count);
+        }
+        nv_drm_free(objects);
+    }
+#endif
+
+    return retcode;
+}
+
 static const struct file_operations nv_drm_fops = {
    .owner          = THIS_MODULE,

    .open           = drm_open,
    .release        = drm_release,
-    .unlocked_ioctl = drm_ioctl,
+    .unlocked_ioctl = nv_drm_ioctl,
 #if defined(CONFIG_COMPAT)
    .compat_ioctl   = drm_compat_ioctl,
 #endif
@@ -807,11 +1304,11 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
    DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
                      nv_drm_fence_supported_ioctl,
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
-    DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
-                      nv_drm_fence_context_create_ioctl,
+    DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
+                      nv_drm_prime_fence_context_create_ioctl,
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
-    DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
-                      nv_drm_gem_fence_attach_ioctl,
+    DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
+                      nv_drm_gem_prime_fence_attach_ioctl,
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
 #endif

@@ -837,6 +1334,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
    DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
                      nv_drm_gem_identify_object_ioctl,
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
+    DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
+                      nv_drm_dmabuf_supported_ioctl,
+                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
+    DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
+                      nv_drm_get_dpy_id_for_connector_id_ioctl,
+                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
+    DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
+                      nv_drm_get_connector_id_for_dpy_id_ioctl,
+                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
+    DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
+                      nv_drm_grant_permission_ioctl,
+                      DRM_UNLOCKED|DRM_MASTER),
+    DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
+                      nv_drm_revoke_permission_ioctl,
+                      DRM_UNLOCKED|DRM_MASTER),
 #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
 };

@@ -894,6 +1406,9 @@ static struct drm_driver nv_drm_driver = {

    .load                   = nv_drm_load,
    .unload                 = nv_drm_unload,
+#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
+    .postclose              = nv_drm_postclose,
+#endif

    .fops                   = &nv_drm_fops,

--- a/kernel-open/nvidia-drm/nvidia-drm-encoder.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-encoder.c
@@ -205,7 +205,7 @@ nv_drm_add_encoder(struct drm_device *dev, NvKmsKapiDisplay hDisplay)
    encoder = nv_drm_encoder_new(dev,
                                 displayInfo->handle,
                                 connectorInfo->signalFormat,
-                                 get_crtc_mask(dev, connectorInfo->headMask));
+                                 get_crtc_mask(dev, displayInfo->headMask));

    if (IS_ERR(encoder)) {
        ret = PTR_ERR(encoder);
--- a/kernel-open/nvidia-drm/nvidia-drm-fb.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c
@@ -150,6 +150,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,

    for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
        if (nv_fb->nv_gem[i] != NULL) {
+            if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
+                                                nv_fb->nv_gem[i]->pMemory)) {
+                NV_DRM_DEV_LOG_INFO(
+                        nv_dev,
+                        "Framebuffer memory not appropriate for scanout");
+                goto fail;
+            }
+
            params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
            params.planes[i].offset = nv_fb->base.offsets[i];
            params.planes[i].pitch = nv_fb->base.pitches[i];
@@ -164,6 +172,17 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
        params.layout = (modifier & 0x10) ?
            NvKmsSurfaceMemoryLayoutBlockLinear :
            NvKmsSurfaceMemoryLayoutPitch;
+
+        // See definition of DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D, we are testing
+        // 'c', the lossless compression field of the modifier
+        if (params.layout == NvKmsSurfaceMemoryLayoutBlockLinear &&
+            (modifier >> 23) & 0x7) {
+            NV_DRM_DEV_LOG_ERR(
+                    nv_dev,
+                    "Cannot create FB from compressible surface allocation");
+            goto fail;
+        }
+
        params.log2GobsPerBlockY = modifier & 0xf;
    } else {
        params.explicit_layout = false;
@@ -174,11 +193,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
    nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, &params);
    if (nv_fb->pSurface == NULL) {
        NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Failed to create NvKmsKapiSurface");
-        drm_framebuffer_cleanup(&nv_fb->base);
-        return -EINVAL;
+        goto fail;
    }

    return 0;
+
+fail:
+    drm_framebuffer_cleanup(&nv_fb->base);
+    return -EINVAL;
 }

 struct drm_framebuffer *nv_drm_internal_framebuffer_create(
--- a/kernel-open/nvidia-drm/nvidia-drm-prime-fence.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-prime-fence.c
@@ -31,17 +31,28 @@
 #include "nvidia-drm-priv.h"
 #include "nvidia-drm-ioctl.h"
 #include "nvidia-drm-gem.h"
-#include "nvidia-drm-prime-fence.h"
+#include "nvidia-drm-fence.h"
 #include "nvidia-dma-resv-helper.h"

 #if defined(NV_DRM_FENCE_AVAILABLE)

 #include "nvidia-dma-fence-helper.h"

-struct nv_drm_fence_context {
-    struct nv_drm_device *nv_dev;
+struct nv_drm_fence_context;

+struct nv_drm_fence_context_ops {
+    void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
+};
+
+struct nv_drm_fence_context {
+    const struct nv_drm_fence_context_ops *ops;
+
+    struct nv_drm_device *nv_dev;
    uint32_t context;
+};
+
+struct nv_drm_prime_fence_context {
+    struct nv_drm_fence_context base;

    NvU64 fenceSemIndex; /* Index into semaphore surface */

@@ -53,10 +64,10 @@ struct nv_drm_fence_context {
    spinlock_t lock;

    /*
-     * Software signaling structures. __nv_drm_fence_context_new()
-     * allocates channel event and __nv_drm_fence_context_destroy() frees it.
-     * There are no simultaneous read/write access to 'cb', therefore it does
-     * not require spin-lock protection.
+     * Software signaling structures. __nv_drm_prime_fence_context_new()
+     * allocates channel event and __nv_drm_prime_fence_context_destroy() frees
+     * it. There are no simultaneous read/write access to 'cb', therefore it
+     * does not require spin-lock protection.
     */
    struct NvKmsKapiChannelEvent *cb;

@@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
 }

 static const char*
-nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
+nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
 {
    return "NVIDIA";
 }
@@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
 }

 static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
-    .get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
+    .get_driver_name = nv_drm_gem_fence_op_get_driver_name,
    .get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
    .enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
    .release = nv_drm_gem_prime_fence_op_release,
@@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
 }

 static void nv_drm_gem_prime_force_fence_signal(
-    struct nv_drm_fence_context *nv_fence_context)
+    struct nv_drm_prime_fence_context *nv_fence_context)
 {
    WARN_ON(!spin_is_locked(&nv_fence_context->lock));

@@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
    NvU32 dataU32
 )
 {
-    struct nv_drm_fence_context *nv_fence_context = dataPtr;
+    struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;

    spin_lock(&nv_fence_context->lock);

@@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
    spin_unlock(&nv_fence_context->lock);
 }

-static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
-    struct nv_drm_device *nv_dev,
-    struct drm_nvidia_fence_context_create_params *p)
+static inline struct nv_drm_prime_fence_context*
+to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
+    return (struct nv_drm_prime_fence_context *)nv_fence_context;
+}
+
+static void __nv_drm_prime_fence_context_destroy(
+    struct nv_drm_fence_context *nv_fence_context)
 {
-    struct nv_drm_fence_context *nv_fence_context;
+    struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
+    struct nv_drm_prime_fence_context *nv_prime_fence_context =
+        to_prime_fence_context(nv_fence_context);
+
+    /*
+     * Free channel event before destroying the fence context, otherwise event
+     * callback continue to get called.
+     */
+    nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
+
+    /* Force signal all pending fences and empty pending list */
+    spin_lock(&nv_prime_fence_context->lock);
+
+    nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
+
+    spin_unlock(&nv_prime_fence_context->lock);
+
+    /* Free nvkms resources */
+
+    nvKms->unmapMemory(nv_dev->pDevice,
+                       nv_prime_fence_context->pSemSurface,
+                       NVKMS_KAPI_MAPPING_TYPE_KERNEL,
+                       (void *) nv_prime_fence_context->pLinearAddress);
+
+    nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
+
+    nv_drm_free(nv_fence_context);
+}
+
+static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
+    .destroy = __nv_drm_prime_fence_context_destroy,
+};
+
+static inline struct nv_drm_prime_fence_context *
+__nv_drm_prime_fence_context_new(
+    struct nv_drm_device *nv_dev,
+    struct drm_nvidia_prime_fence_context_create_params *p)
+{
+    struct nv_drm_prime_fence_context *nv_prime_fence_context;
    struct NvKmsKapiMemory *pSemSurface;
    NvU32 *pLinearAddress;

@@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
     * event for it.
     */

-    if ((nv_fence_context = nv_drm_calloc(
+    if ((nv_prime_fence_context = nv_drm_calloc(
                    1,
-                    sizeof(*nv_fence_context))) == NULL) {
+                    sizeof(*nv_prime_fence_context))) == NULL) {
        goto failed_alloc_fence_context;
    }

@@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
     * to check a return value.
     */

-    *nv_fence_context = (struct nv_drm_fence_context) {
-        .nv_dev = nv_dev,
-        .context = nv_dma_fence_context_alloc(1),
+    *nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
+        .base.ops = &nv_drm_prime_fence_context_ops,
+        .base.nv_dev = nv_dev,
+        .base.context = nv_dma_fence_context_alloc(1),
        .pSemSurface = pSemSurface,
        .pLinearAddress = pLinearAddress,
        .fenceSemIndex = p->index,
    };

-    INIT_LIST_HEAD(&nv_fence_context->pending);
+    INIT_LIST_HEAD(&nv_prime_fence_context->pending);

-    spin_lock_init(&nv_fence_context->lock);
+    spin_lock_init(&nv_prime_fence_context->lock);

    /*
     * Except 'cb', the fence context should be completely initialized
@@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
     * There are no simultaneous read/write access to 'cb', therefore it does
     * not require spin-lock protection.
     */
-    nv_fence_context->cb =
+    nv_prime_fence_context->cb =
        nvKms->allocateChannelEvent(nv_dev->pDevice,
                                    nv_drm_gem_prime_fence_event,
-                                    nv_fence_context,
+                                    nv_prime_fence_context,
                                    p->event_nvkms_params_ptr,
                                    p->event_nvkms_params_size);
-    if (!nv_fence_context->cb) {
+    if (!nv_prime_fence_context->cb) {
        NV_DRM_DEV_LOG_ERR(nv_dev,
                           "Failed to allocate fence signaling event");
        goto failed_to_allocate_channel_event;
    }

-    return nv_fence_context;
+    return nv_prime_fence_context;

 failed_to_allocate_channel_event:
-    nv_drm_free(nv_fence_context);
+    nv_drm_free(nv_prime_fence_context);

 failed_alloc_fence_context:

@@ -287,38 +341,8 @@ failed:
    return NULL;
 }

-static void __nv_drm_fence_context_destroy(
-    struct nv_drm_fence_context *nv_fence_context)
-{
-    struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
-
-    /*
-     * Free channel event before destroying the fence context, otherwise event
-     * callback continue to get called.
-     */
-    nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
-
-    /* Force signal all pending fences and empty pending list */
-    spin_lock(&nv_fence_context->lock);
-
-    nv_drm_gem_prime_force_fence_signal(nv_fence_context);
-
-    spin_unlock(&nv_fence_context->lock);
-
-    /* Free nvkms resources */
-
-    nvKms->unmapMemory(nv_dev->pDevice,
-                       nv_fence_context->pSemSurface,
-                       NVKMS_KAPI_MAPPING_TYPE_KERNEL,
-                       (void *) nv_fence_context->pLinearAddress);
-
-    nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
-
-    nv_drm_free(nv_fence_context);
-}
-
-static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
-    struct nv_drm_fence_context *nv_fence_context,
+static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
+    struct nv_drm_prime_fence_context *nv_prime_fence_context,
    unsigned int seqno)
 {
    struct nv_drm_prime_fence *nv_fence;
@@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
        goto out;
    }

-    spin_lock(&nv_fence_context->lock);
+    spin_lock(&nv_prime_fence_context->lock);

    /*
     * If seqno wrapped, force signal fences to make sure none of them
     * get stuck.
     */
-    if (seqno < nv_fence_context->last_seqno) {
-        nv_drm_gem_prime_force_fence_signal(nv_fence_context);
+    if (seqno < nv_prime_fence_context->last_seqno) {
+        nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
    }

    INIT_LIST_HEAD(&nv_fence->list_entry);
@@ -344,17 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
    spin_lock_init(&nv_fence->lock);

    nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
-                      &nv_fence->lock, nv_fence_context->context,
+                      &nv_fence->lock, nv_prime_fence_context->base.context,
                      seqno);

    /* The context maintains a reference to any pending fences. */
    nv_dma_fence_get(&nv_fence->base);

-    list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
+    list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);

-    nv_fence_context->last_seqno = seqno;
+    nv_prime_fence_context->last_seqno = seqno;

-    spin_unlock(&nv_fence_context->lock);
+    spin_unlock(&nv_prime_fence_context->lock);

 out:
    return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
@@ -388,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
 * because tear down sequence calls to flush all existing
 * worker thread.
 */
-static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
+static void
+__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
 {
    struct nv_drm_gem_fence_context *nv_gem_fence_context =
        to_gem_fence_context(nv_gem);
+    struct nv_drm_fence_context *nv_fence_context =
+        nv_gem_fence_context->nv_fence_context;

-    __nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
+    nv_fence_context->ops->destroy(nv_fence_context);

    nv_drm_free(nv_gem_fence_context);
 }
@@ -403,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
 };

 static inline
-struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
+struct nv_drm_gem_fence_context *
+__nv_drm_gem_object_fence_context_lookup(
    struct drm_device *dev,
    struct drm_file *filp,
    u32 handle)
@@ -419,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
    return to_gem_fence_context(nv_gem);
 }

-int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
-                                      void *data, struct drm_file *filep)
+static int
+__nv_drm_gem_fence_context_create(struct drm_device *dev,
+                                  struct nv_drm_fence_context *nv_fence_context,
+                                  u32 *handle,
+                                  struct drm_file *filep)
 {
    struct nv_drm_device *nv_dev = to_nv_device(dev);
-    struct drm_nvidia_fence_context_create_params *p = data;
    struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;

    if ((nv_gem_fence_context = nv_drm_calloc(
@@ -432,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
        goto done;
    }

-    if ((nv_gem_fence_context->nv_fence_context =
-                __nv_drm_fence_context_new(nv_dev, p)) == NULL) {
-        goto fence_context_new_failed;
-    }
+    nv_gem_fence_context->nv_fence_context = nv_fence_context;

    nv_drm_gem_object_init(nv_dev,
                           &nv_gem_fence_context->base,
@@ -445,26 +472,51 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,

    return nv_drm_gem_handle_create_drop_reference(filep,
                                                   &nv_gem_fence_context->base,
-                                                   &p->handle);
-
-fence_context_new_failed:
-    nv_drm_free(nv_gem_fence_context);
+                                                   handle);

 done:
    return -ENOMEM;
 }

-int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
-                                  void *data, struct drm_file *filep)
+int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
+                                            void *data, struct drm_file *filep)
+{
+    struct nv_drm_device *nv_dev = to_nv_device(dev);
+    struct drm_nvidia_prime_fence_context_create_params *p = data;
+    struct nv_drm_prime_fence_context *nv_prime_fence_context =
+        __nv_drm_prime_fence_context_new(nv_dev, p);
+    int err;
+
+    if (!nv_prime_fence_context) {
+        goto done;
+    }
+
+    err = __nv_drm_gem_fence_context_create(dev,
+                                            &nv_prime_fence_context->base,
+                                            &p->handle,
+                                            filep);
+    if (err) {
+        __nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
+    }
+
+    return err;
+
+done:
+    return -ENOMEM;
+}
+
+int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
+                                        void *data, struct drm_file *filep)
 {
    int ret = -EINVAL;
    struct nv_drm_device *nv_dev = to_nv_device(dev);
-    struct drm_nvidia_gem_fence_attach_params *p = data;
+    struct drm_nvidia_gem_prime_fence_attach_params *p = data;

    struct nv_drm_gem_object *nv_gem;
    struct nv_drm_gem_fence_context *nv_gem_fence_context;

    nv_dma_fence_t *fence;
+    nv_dma_resv_t *resv;

    nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);

@@ -490,9 +542,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
        goto fence_context_lookup_failed;
    }

-    if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
-                            nv_gem_fence_context->nv_fence_context,
-                            p->sem_thresh))) {
+    if (nv_gem_fence_context->nv_fence_context->ops !=
+        &nv_drm_prime_fence_context_ops) {
+
+        NV_DRM_DEV_LOG_ERR(
+            nv_dev,
+            "Wrong fence context type: 0x%08x",
+            p->fence_context_handle);
+
+        goto fence_context_create_fence_failed;
+    }
+
+    fence = __nv_drm_prime_fence_context_create_fence(
+                to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
+                p->sem_thresh);
+
+    if (IS_ERR(fence)) {
        ret = PTR_ERR(fence);

        NV_DRM_DEV_LOG_ERR(
@@ -502,18 +567,20 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
        goto fence_context_create_fence_failed;
    }

-    nv_dma_resv_lock(&nv_gem->resv, NULL);
+    resv = nv_drm_gem_res_obj(nv_gem);

-    ret = nv_dma_resv_reserve_fences(&nv_gem->resv, 1, false);
+    nv_dma_resv_lock(resv, NULL);
+
+    ret = nv_dma_resv_reserve_fences(resv, 1, false);
    if (ret == 0) {
-        nv_dma_resv_add_excl_fence(&nv_gem->resv, fence);
+        nv_dma_resv_add_excl_fence(resv, fence);
    } else {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to reserve fence. Error code: %d", ret);
    }

-    nv_dma_resv_unlock(&nv_gem->resv);
+    nv_dma_resv_unlock(resv);

    /* dma_resv_add_excl_fence takes its own reference to the fence. */
    nv_dma_fence_put(fence);
--- a/kernel-open/nvidia-drm/nvidia-drm-prime-fence.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-prime-fence.h
@@ -35,11 +35,11 @@ struct drm_device;
 int nv_drm_fence_supported_ioctl(struct drm_device *dev,
                                 void *data, struct drm_file *filep);

-int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
-                                      void *data, struct drm_file *filep);
+int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
+                                            void *data, struct drm_file *filep);

-int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
-                                  void *data, struct drm_file *filep);
+int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
+                                        void *data, struct drm_file *filep);

 #endif /* NV_DRM_FENCE_AVAILABLE */

--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -131,11 +131,11 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
    const struct nv_drm_gem_object *nv_gem_src);

 static int __nv_drm_gem_nvkms_map(
-    struct nv_drm_device *nv_dev,
-    struct NvKmsKapiMemory *pMemory,
-    struct nv_drm_gem_nvkms_memory *nv_nvkms_memory,
-    uint64_t size)
+    struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
 {
+    struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
+    struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
+
    if (!nv_dev->hasVideoMemory) {
        return 0;
    }
@@ -153,7 +153,7 @@ static int __nv_drm_gem_nvkms_map(

    nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
            (uintptr_t)nv_nvkms_memory->pPhysicalAddress,
-            size);
+            nv_nvkms_memory->base.base.size);

    if (!nv_nvkms_memory->pWriteCombinedIORemapAddress) {
        NV_DRM_DEV_LOG_INFO(
@@ -167,6 +167,22 @@ static int __nv_drm_gem_nvkms_map(
    return 0;
 }

+static void *__nv_drm_gem_nvkms_prime_vmap(
+    struct nv_drm_gem_object *nv_gem)
+{
+    struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
+        to_nv_nvkms_memory(nv_gem);
+
+    if (!nv_nvkms_memory->physically_mapped) {
+        int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
+        if (ret) {
+           return ERR_PTR(ret);
+        }
+    }
+
+    return nv_nvkms_memory->pWriteCombinedIORemapAddress;
+}
+
 static int __nv_drm_gem_map_nvkms_memory_offset(
    struct nv_drm_device *nv_dev,
    struct nv_drm_gem_object *nv_gem,
@@ -176,10 +192,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
        to_nv_nvkms_memory(nv_gem);

    if (!nv_nvkms_memory->physically_mapped) {
-        int ret = __nv_drm_gem_nvkms_map(nv_dev,
-                                         nv_nvkms_memory->base.pMemory,
-                                         nv_nvkms_memory,
-                                         nv_nvkms_memory->base.base.size);
+        int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
        if (ret) {
           return ret;
        }
@@ -214,6 +227,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
 const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
    .free = __nv_drm_gem_nvkms_memory_free,
    .prime_dup = __nv_drm_gem_nvkms_prime_dup,
+    .prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
    .mmap = __nv_drm_gem_nvkms_mmap,
    .handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
    .create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
@@ -314,7 +328,7 @@ int nv_drm_dumb_create(
     * to use dumb buffers for software rendering, so they're not much use
     * without a CPU mapping.
     */
-    ret = __nv_drm_gem_nvkms_map(nv_dev, pMemory, nv_nvkms_memory, args->size);
+    ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
    if (ret) {
        nv_drm_gem_object_unreference_unlocked(&nv_nvkms_memory->base);
        goto fail;
--- a/kernel-open/nvidia-drm/nvidia-drm-gem.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.c
@@ -26,7 +26,7 @@

 #include "nvidia-drm-priv.h"
 #include "nvidia-drm-ioctl.h"
-#include "nvidia-drm-prime-fence.h"
+#include "nvidia-drm-fence.h"
 #include "nvidia-drm-gem.h"
 #include "nvidia-drm-gem-nvkms-memory.h"
 #include "nvidia-drm-gem-user-memory.h"
@@ -81,10 +81,13 @@ typedef struct dma_buf_map nv_sysio_map_t;
 static int nv_drm_gem_vmap(struct drm_gem_object *gem,
                           nv_sysio_map_t *map)
 {
-    map->vaddr = nv_drm_gem_prime_vmap(gem);
-    if (map->vaddr == NULL) {
+    void *vaddr = nv_drm_gem_prime_vmap(gem);
+    if (vaddr == NULL) {
        return -ENOMEM;
+    } else if (IS_ERR(vaddr)) {
+        return PTR_ERR(vaddr);
    }
+    map->vaddr = vaddr;
    map->is_iomem = true;
    return 0;
 }
@@ -132,13 +135,8 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,

    /* Initialize the gem object */

-#if defined(NV_DRM_FENCE_AVAILABLE)
+#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
    nv_dma_resv_init(&nv_gem->resv);
-
-#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
-    nv_gem->base.resv = &nv_gem->resv;
-#endif
-
 #endif

 #if !defined(NV_DRM_DRIVER_HAS_GEM_FREE_OBJECT)
@@ -212,8 +210,7 @@ void nv_drm_gem_prime_vunmap(struct drm_gem_object *gem, void *address)
 nv_dma_resv_t* nv_drm_gem_prime_res_obj(struct drm_gem_object *obj)
 {
    struct nv_drm_gem_object *nv_gem = to_nv_gem_object(obj);
-
-    return &nv_gem->resv;
+    return nv_drm_gem_res_obj(nv_gem);
 }
 #endif

--- a/kernel-open/nvidia-drm/nvidia-drm-gem.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.h
@@ -45,6 +45,8 @@
 #include "nvidia-dma-resv-helper.h"
 #endif

+#include "linux/dma-buf.h"
+
 struct nv_drm_gem_object;

 struct nv_drm_gem_object_funcs {
@@ -71,7 +73,7 @@ struct nv_drm_gem_object {

    struct NvKmsKapiMemory *pMemory;

-#if defined(NV_DRM_FENCE_AVAILABLE)
+#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
    nv_dma_resv_t  resv;
 #endif
 };
@@ -177,6 +179,17 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
    return drm_gem_handle_create(filp, &nv_gem->base, handle);
 }

+#if defined(NV_DRM_FENCE_AVAILABLE)
+static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
+{
+#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
+    return nv_gem->base.resv;
+#else
+    return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
+#endif
+}
+#endif
+
 void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
                            struct nv_drm_gem_object *nv_gem,
                            const struct nv_drm_gem_object_funcs * const ops,
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c
@@ -28,6 +28,8 @@
 */

 #include "nvidia-drm-helper.h"
+#include "nvidia-drm-priv.h"
+#include "nvidia-drm-crtc.h"

 #include "nvmisc.h"

@@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
            goto free;
    }

+#if defined(NV_DRM_ROTATION_AVAILABLE)
+    nv_drm_for_each_plane(plane, dev) {
+        plane_state = drm_atomic_get_plane_state(state, plane);
+        if (IS_ERR(plane_state)) {
+            ret = PTR_ERR(plane_state);
+            goto free;
+        }
+
+        plane_state->rotation = DRM_MODE_ROTATE_0;
+    }
+#endif
+
    nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
        ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
        if (ret < 0)
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h
@@ -35,6 +35,35 @@
 #include <drm/drm_drv.h>
 #endif

+#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
+/* For DRM_ROTATE_* , DRM_REFLECT_* */
+#include <drm/drm_blend.h>
+#endif
+
+#if defined(NV_DRM_ROTATION_AVAILABLE)
+/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
+#include <uapi/drm/drm_mode.h>
+#endif
+
+#if defined(NV_DRM_ROTATION_AVAILABLE)
+/*
+ * 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
+ * DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
+ * DRM_ROTATE_* and DRM_REFLECT_*
+ */
+#if !defined(DRM_MODE_ROTATE_0)
+#define DRM_MODE_ROTATE_0       DRM_ROTATE_0
+#define DRM_MODE_ROTATE_90      DRM_ROTATE_90
+#define DRM_MODE_ROTATE_180     DRM_ROTATE_180
+#define DRM_MODE_ROTATE_270     DRM_ROTATE_270
+#define DRM_MODE_REFLECT_X      DRM_REFLECT_X
+#define DRM_MODE_REFLECT_Y      DRM_REFLECT_Y
+#define DRM_MODE_ROTATE_MASK    DRM_ROTATE_MASK
+#define DRM_MODE_REFLECT_MASK   DRM_REFLECT_MASK
+#endif
+
+#endif //NV_DRM_ROTATION_AVAILABLE
+
 /*
 * drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
 * (2017-09-26) and drm_dev_unref() is removed by
@@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
    for_each_plane_in_state(__state, plane, plane_state, __i)
 #endif

-static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
-    uint32_t id)
+static inline struct drm_connector *
+nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
+                        uint32_t id)
+{
+#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
+    return drm_connector_find(dev, id);
+#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
+    return drm_connector_lookup(dev, filep, id);
+#else
+    return drm_connector_lookup(dev, id);
+#endif
+}
+
+static inline void nv_drm_connector_put(struct drm_connector *connector)
+{
+#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
+    drm_connector_put(connector);
+#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
+    drm_connector_unreference(connector);
+#endif
+}
+
+static inline struct drm_crtc *
+nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
 {
 #if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
-    return drm_crtc_find(dev, NULL /* file_priv */, id);
+    return drm_crtc_find(dev, filep, id);
 #else
    return drm_crtc_find(dev, id);
 #endif
@@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
 #endif
 }

+#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
+#include <drm/drm_auth.h>
+#endif
+#if defined(NV_DRM_DRM_FILE_H_PRESENT)
+#include <drm/drm_file.h>
+#endif
+
+/*
+ * drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
+ * pointers in drm_lease.c") in v5.15 (2021-07-20)
+ */
+static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
+{
+#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
+    return drm_file_get_master(filep);
+#else
+    if (filep->master) {
+        return drm_master_get(filep->master);
+    } else {
+        return NULL;
+    }
+#endif
+}
+
 /*
 * drm_connector_for_each_possible_encoder() is added by commit
 * 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
--- a/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
@@ -34,8 +34,8 @@
 #define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY      0x02
 #define DRM_NVIDIA_GET_DEV_INFO                     0x03
 #define DRM_NVIDIA_FENCE_SUPPORTED                  0x04
-#define DRM_NVIDIA_FENCE_CONTEXT_CREATE             0x05
-#define DRM_NVIDIA_GEM_FENCE_ATTACH                 0x06
+#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE       0x05
+#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH           0x06
 #define DRM_NVIDIA_GET_CLIENT_CAPABILITY            0x08
 #define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY          0x09
 #define DRM_NVIDIA_GEM_MAP_OFFSET                   0x0a
@@ -43,6 +43,11 @@
 #define DRM_NVIDIA_GET_CRTC_CRC32_V2                0x0c
 #define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY         0x0d
 #define DRM_NVIDIA_GEM_IDENTIFY_OBJECT              0x0e
+#define DRM_NVIDIA_DMABUF_SUPPORTED                 0x0f
+#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID      0x10
+#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID      0x11
+#define DRM_NVIDIA_GRANT_PERMISSIONS                0x12
+#define DRM_NVIDIA_REVOKE_PERMISSIONS               0x13

 #define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY                           \
    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY),      \
@@ -65,50 +70,69 @@
 #if defined(NV_LINUX)
 #define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED                         \
    DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
+#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED                        \
+    DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
 #else
 #define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
+#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
 #endif

-#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE                        \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE),   \
-             struct drm_nvidia_fence_context_create_params)
+#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE                     \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
+             struct drm_nvidia_prime_fence_context_create_params)

-#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH                            \
-    DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH),        \
-            struct drm_nvidia_gem_fence_attach_params)
+#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH                         \
+    DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH),     \
+            struct drm_nvidia_gem_prime_fence_attach_params)

-#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY                       \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY),  \
+#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY                          \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY),     \
             struct drm_nvidia_get_client_capability_params)

-#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32                              \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32),         \
+#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32                                 \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32),            \
             struct drm_nvidia_get_crtc_crc32_params)

-#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2                           \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2),      \
+#define DRM_IOCTL_NVIDIA_GET_CRTC_CRC32_V2                              \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CRTC_CRC32_V2),         \
              struct drm_nvidia_get_crtc_crc32_v2_params)

-#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY                           \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY),      \
+#define DRM_IOCTL_NVIDIA_GEM_EXPORT_NVKMS_MEMORY                        \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY),   \
              struct drm_nvidia_gem_export_nvkms_memory_params)

-#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET                              \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET),         \
+#define DRM_IOCTL_NVIDIA_GEM_MAP_OFFSET                                 \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_MAP_OFFSET),            \
             struct drm_nvidia_gem_map_offset_params)

-#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY                      \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY), \
+#define DRM_IOCTL_NVIDIA_GEM_ALLOC_NVKMS_MEMORY                         \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_ALLOC_NVKMS_MEMORY),    \
              struct drm_nvidia_gem_alloc_nvkms_memory_params)

-#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY                      \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY), \
+#define DRM_IOCTL_NVIDIA_GEM_EXPORT_DMABUF_MEMORY                       \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY),  \
              struct drm_nvidia_gem_export_dmabuf_memory_params)

-#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT                      \
-    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
+#define DRM_IOCTL_NVIDIA_GEM_IDENTIFY_OBJECT                            \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT),       \
              struct drm_nvidia_gem_identify_object_params)

+#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID                     \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
+             struct drm_nvidia_get_dpy_id_for_connector_id_params)
+
+#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID                     \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
+             struct drm_nvidia_get_connector_id_for_dpy_id_params)
+
+#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS                              \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS),         \
+             struct drm_nvidia_grant_permissions_params)
+
+#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS                             \
+    DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS),        \
+             struct drm_nvidia_revoke_permissions_params)
+
 struct drm_nvidia_gem_import_nvkms_memory_params {
    uint64_t mem_size;           /* IN */

@@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
    uint32_t sector_layout;        /* OUT */
 };

-struct drm_nvidia_fence_context_create_params {
+struct drm_nvidia_prime_fence_context_create_params {
    uint32_t handle;            /* OUT GEM handle to fence context */

    uint32_t index;             /* IN Index of semaphore to use for fencing */
@@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
    uint64_t event_nvkms_params_size; /* IN */
 };

-struct drm_nvidia_gem_fence_attach_params {
+struct drm_nvidia_gem_prime_fence_attach_params {
    uint32_t handle;                /* IN GEM handle to attach fence to */
    uint32_t fence_context_handle;  /* IN GEM handle to fence context on which fence is run on */
    uint32_t sem_thresh;            /* IN Semaphore value to reach before signal */
@@ -232,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
    drm_nvidia_gem_object_type  object_type;    /* OUT GEM object type */
 };

+struct drm_nvidia_get_dpy_id_for_connector_id_params {
+    uint32_t connectorId; /* IN */
+    uint32_t dpyId;       /* OUT */
+};
+
+struct drm_nvidia_get_connector_id_for_dpy_id_params {
+    uint32_t dpyId;       /* IN */
+    uint32_t connectorId; /* OUT */
+};
+
+struct drm_nvidia_grant_permissions_params {
+    int32_t fd;           /* IN */
+    uint32_t dpyId;       /* IN */
+};
+
+struct drm_nvidia_revoke_permissions_params {
+    uint32_t dpyId;       /* IN */
+};
+
 #endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
--- a/kernel-open/nvidia-drm/nvidia-drm.Kbuild
+++ b/kernel-open/nvidia-drm/nvidia-drm.Kbuild
@@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
 NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
@@ -126,5 +126,10 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
 NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
 NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
--- a/kernel-open/nvidia-modeset/nv-kthread-q.c
+++ b/kernel-open/nvidia-modeset/nv-kthread-q.c
@@ -301,7 +301,7 @@ static void _q_flush_function(void *args)
 static void _raw_q_flush(nv_kthread_q_t *q)
 {
    nv_kthread_q_item_t q_item;
-    DECLARE_COMPLETION(completion);
+    DECLARE_COMPLETION_ONSTACK(completion);

    nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);

--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -78,6 +78,15 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
 static bool malloc_verbose = false;
 module_param_named(malloc_verbose, malloc_verbose, bool, 0400);

+/* This parameter is used to find the dpy override conf file */
+#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
+
+MODULE_PARM_DESC(config_file,
+                 "Path to the nvidia-modeset configuration file "
+                 "(default: disabled)");
+static char *nvkms_conf = NULL;
+module_param_named(config_file, nvkms_conf, charp, 0400);
+
 static atomic_t nvkms_alloc_called_count;

 NvBool nvkms_output_rounding_fix(void)
@@ -1370,6 +1379,117 @@ static void nvkms_proc_exit(void)
 #endif /* CONFIG_PROC_FS */
 }

+/*************************************************************************
+ * NVKMS Config File Read
+ ************************************************************************/
+static NvBool nvkms_fs_mounted(void)
+{
+    return current->fs != NULL;
+}
+
+static size_t nvkms_config_file_open
+(
+    char *fname,
+    char ** const buff
+)
+{
+    int i = 0;
+    struct file *file;
+    struct inode *file_inode;
+    size_t file_size = 0;
+    size_t read_size = 0;
+#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
+    loff_t pos = 0;
+#endif
+
+    if (!nvkms_fs_mounted()) {
+        printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
+        return 0;
+    }
+
+    file = filp_open(fname, O_RDONLY, 0);
+    if (file == NULL || IS_ERR(file)) {
+        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
+               fname);
+        return 0;
+    }
+
+    file_inode = file->f_inode;
+    if (file_inode == NULL || IS_ERR(file_inode)) {
+        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
+        goto done;
+    }
+    file_size = file_inode->i_size;
+    if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
+        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
+        goto done;
+    }
+
+    *buff = nvkms_alloc(file_size, NV_FALSE);
+    if (*buff == NULL) {
+        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
+        goto done;
+    }
+
+    /*
+     * TODO: Once we have access to GPL symbols, this can be replaced with
+     * kernel_read_file for kernels >= 4.6
+     */
+    while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
+#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
+        ssize_t ret = kernel_read(file, *buff + read_size,
+                                  file_size - read_size, &pos);
+#else
+        ssize_t ret = kernel_read(file, read_size,
+                                  *buff + read_size,
+                                  file_size - read_size);
+#endif
+        if (ret <= 0) {
+            break;
+        }
+        read_size += ret;
+    }
+
+    if (read_size != file_size) {
+        printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
+               fname);
+        goto done;
+    }
+
+    filp_close(file, current->files);
+    return file_size;
+
+done:
+    nvkms_free(*buff, file_size);
+    filp_close(file, current->files);
+    return 0;
+}
+
+/* must be called with nvkms_lock locked */
+static void nvkms_read_config_file_locked(void)
+{
+    char *buffer = NULL;
+    size_t buf_size = 0;
+
+    /* only read the config file if the kernel parameter is set */
+    if (!NVKMS_CONF_FILE_SPECIFIED) {
+        return;
+    }
+
+    buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
+
+    if (buf_size == 0) {
+        return;
+    }
+
+    if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
+        printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
+               nvkms_conf);
+    }
+
+    nvkms_free(buffer, buf_size);
+}
+
 /*************************************************************************
 * NVKMS KAPI functions
 ************************************************************************/
@@ -1541,10 +1661,12 @@ static int __init nvkms_init(void)
    if (!nvKmsModuleLoad()) {
        ret = -ENOMEM;
    }
-    up(&nvkms_lock);
    if (ret != 0) {
+        up(&nvkms_lock);
        goto fail_module_load;
    }
+    nvkms_read_config_file_locked();
+    up(&nvkms_lock);

    nvkms_proc_init();

--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -40,17 +40,31 @@
 #include "nv_stdarg.h"

 enum NvKmsSyncPtOp {
+    /*
+     * Call into Tegra's kernel nvhost driver, and allocate a syncpoint that can
+     * be exclusively used by the caller. Internally, this operation will call
+     * get() to set the initial refcount of the syncpoint to 1.
+     */
    NVKMS_SYNCPT_OP_ALLOC,
-    NVKMS_SYNCPT_OP_GET,
+    /*
+     * Decrease the refcount of an already allocated syncpoint. Once the
+     * refcount drops to 0, the syncpoint will be returned to the free pool that
+     * nvhost manages, so PUT can also be used to balance out an ALLOC.
+     */
    NVKMS_SYNCPT_OP_PUT,
-    NVKMS_SYNCPT_OP_INCR_MAX,
-    NVKMS_SYNCPT_OP_CPU_INCR,
+    /*
+     * Extract syncpt id and thresh from the sync-file file descriptor
+     */
    NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
+    /*
+     * Create dma-fence from syncpt id and thresh value and create sync_file
+     * file descriptor for the dma-fence handle created.
+     */
    NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
+    /*
+     * read syncpt minimum value of given syncpt
+     */
    NVKMS_SYNCPT_OP_READ_MINVAL,
-    NVKMS_SYNCPT_OP_READ_MAXVAL,
-    NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
-    NVKMS_SYNCPT_OP_SET_MAXVAL,
 };

 typedef struct {
@@ -60,24 +74,10 @@ typedef struct {
        NvU32 id;                       /*  out  */
    } alloc;

-    struct {
-        NvU32 id;                       /*  in   */
-    } get;
-
    struct {
        NvU32 id;                       /*  in   */
    } put;

-    struct {
-        NvU32 id;                       /*  in   */
-        NvU32 incr;                     /*  in   */
-        NvU32 value;                    /*  out  */
-    } incr_max;
-
-    struct {
-        NvU32 id;                       /*  in   */
-    } cpu_incr;
-
    struct {
        NvS32 fd;                       /*  in   */
        NvU32 id;                       /*  out  */
@@ -94,20 +94,6 @@ typedef struct {
        NvU32 id;                       /*  in   */
        NvU32 minval;                   /*  out  */
    } read_minval;
-
-    struct {
-        NvU32 id;                       /*  in   */
-        NvU32 maxval;                   /*  out  */
-    } read_maxval;
-
-    struct {
-        NvU32 id;                       /*  in   */
-    } set_min_eq_max;
-
-    struct {
-        NvU32 id;                       /*  in   */
-        NvU32 val;                      /*  in   */
-    } set_maxval;
 } NvKmsSyncPtOpParams;

 NvBool nvkms_output_rounding_fix(void);
--- a/kernel-open/nvidia-modeset/nvkms.h
+++ b/kernel-open/nvidia-modeset/nvkms.h
@@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
                                 char *buffer, size_t size,
                                 nvkms_procfs_out_string_func_t *outString);

+/* max number of loops to prevent hanging the kernel if an edge case is hit */
+#define NVKMS_READ_FILE_MAX_LOOPS 1000
+/* max size for any file read by the config system */
+#define NVKMS_READ_FILE_MAX_SIZE  8192
+
+/*
+ * The read file callback should allocate a buffer pointed to by *buff, fill it
+ * with the contents of fname, and return the size of the buffer. Buffer is not
+ * guaranteed to be null-terminated. The caller is responsible for freeing the
+ * buffer with nvkms_free, not nvFree.
+ */
+typedef size_t nvkms_config_read_file_func_t(char *fname,
+                                             char ** const buff);
+
 typedef struct {
    const char *name;
    nvkms_procfs_proc_t *func;
@@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);

 void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);

+NvBool nvKmsReadConf(const char *buff, size_t size,
+                     nvkms_config_read_file_func_t readfile);
+
 void nvKmsKapiHandleEventQueueChange
 (
    struct NvKmsKapiDevice *device
--- a/kernel-open/nvidia-uvm/cla06fsubch.h
+++ b/kernel-open/nvidia-uvm/cla06fsubch.h
@@ -1,29 +1,33 @@
-/*******************************************************************************
-    Copyright (c) 2013 NVIDIA Corporation
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#ifndef __cla06fsubch_h__
-#define __cla06fsubch_h__
+#ifndef _cla06fsubch_h_
+#define _cla06fsubch_h_

+#define NVA06F_SUBCHANNEL_2D                            3
+#define NVA06F_SUBCHANNEL_3D                            0
+#define NVA06F_SUBCHANNEL_COMPUTE                       1
 #define NVA06F_SUBCHANNEL_COPY_ENGINE                   4
+#define NVA06F_SUBCHANNEL_I2M                           2

-#endif // {__cla06fsubch_h__}
+#endif // _cla06fsubch_h_
--- a/kernel-open/nvidia-uvm/cla16f.h
+++ b/kernel-open/nvidia-uvm/cla16f.h
@@ -1,25 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2021-2022 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _cla16f_h_
 #define _cla16f_h_
@@ -30,9 +30,48 @@ extern "C" {

 #include "nvtypes.h"

-#define KEPLER_CHANNEL_GPFIFO_B                                    (0x0000A16F)
+/* class KEPLER_CHANNEL_GPFIFO  */
+/*
+ * Documentation for KEPLER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ */
+#define  KEPLER_CHANNEL_GPFIFO_B                           (0x0000A16F)

+/* pio method data structure */
+typedef volatile struct _cla16f_tag0 {
+ NvV32 Reserved00[0x7c0];
+} NvA16FTypedef, KEPLER_ChannelGPFifoB;
+#define NVA16F_TYPEDEF                               KEPLER_CHANNELChannelGPFifo
+/* dma flow control data structure */
+typedef volatile struct _cla16f_tag1 {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} NvA16FControl, KeplerBControlGPFifo;
+/* fields and values */
+#define NVA16F_NUMBER_OF_SUBCHANNELS                               (8)
 #define NVA16F_SET_OBJECT                                          (0x00000000)
+#define NVA16F_SET_OBJECT_NVCLASS                                         15:0
+#define NVA16F_SET_OBJECT_ENGINE                                         20:16
+#define NVA16F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVA16F_ILLEGAL                                             (0x00000004)
+#define NVA16F_ILLEGAL_HANDLE                                             31:0
 #define NVA16F_NOP                                                 (0x00000008)
 #define NVA16F_NOP_HANDLE                                                 31:0
 #define NVA16F_SEMAPHOREA                                          (0x00000010)
@@ -100,6 +139,12 @@ extern "C" {
 #define NVA16F_SET_REFERENCE_COUNT                                        31:0
 #define NVA16F_WFI                                                 (0x00000078)
 #define NVA16F_WFI_HANDLE                                                 31:0
+#define NVA16F_CRC_CHECK                                           (0x0000007c)
+#define NVA16F_CRC_CHECK_VALUE                                            31:0
+#define NVA16F_YIELD                                               (0x00000080)
+#define NVA16F_YIELD_OP                                                    1:0
+#define NVA16F_YIELD_OP_NOP                                         0x00000000
+

 /* GPFIFO entry format */
 #define NVA16F_GP_ENTRY__SIZE                                   8
@@ -126,13 +171,28 @@ extern "C" {
 #define NVA16F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003

 /* dma method formats */
+#define NVA16F_DMA_METHOD_ADDRESS_OLD                              12:2
 #define NVA16F_DMA_METHOD_ADDRESS                                  11:0
+#define NVA16F_DMA_SUBDEVICE_MASK                                  15:4
 #define NVA16F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVA16F_DMA_TERT_OP                                         17:16
+#define NVA16F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVA16F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVA16F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVA16F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVA16F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVA16F_DMA_METHOD_COUNT_OLD                                28:18
 #define NVA16F_DMA_METHOD_COUNT                                    28:16
+#define NVA16F_DMA_IMMD_DATA                                       28:16
 #define NVA16F_DMA_SEC_OP                                          31:29
+#define NVA16F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
 #define NVA16F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVA16F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
 #define NVA16F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
-
+#define NVA16F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVA16F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVA16F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVA16F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
 /* dma incrementing method format */
 #define NVA16F_DMA_INCR_ADDRESS                                    11:0
 #define NVA16F_DMA_INCR_SUBCHANNEL                                 15:13
@@ -140,7 +200,6 @@ extern "C" {
 #define NVA16F_DMA_INCR_OPCODE                                     31:29
 #define NVA16F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
 #define NVA16F_DMA_INCR_DATA                                       31:0
-
 /* dma non-incrementing method format */
 #define NVA16F_DMA_NONINCR_ADDRESS                                 11:0
 #define NVA16F_DMA_NONINCR_SUBCHANNEL                              15:13
@@ -148,13 +207,45 @@ extern "C" {
 #define NVA16F_DMA_NONINCR_OPCODE                                  31:29
 #define NVA16F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
 #define NVA16F_DMA_NONINCR_DATA                                    31:0
-
+/* dma increment-once method format */
+#define NVA16F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVA16F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVA16F_DMA_ONEINCR_COUNT                                   28:16
+#define NVA16F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVA16F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVA16F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVA16F_DMA_NOP                                             (0x00000000)
 /* dma immediate-data format */
 #define NVA16F_DMA_IMMD_ADDRESS                                    11:0
 #define NVA16F_DMA_IMMD_SUBCHANNEL                                 15:13
 #define NVA16F_DMA_IMMD_DATA                                       28:16
 #define NVA16F_DMA_IMMD_OPCODE                                     31:29
 #define NVA16F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVA16F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVA16F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVA16F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVA16F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVA16F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVA16F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVA16F_DMA_ADDRESS                                         12:2
+#define NVA16F_DMA_SUBCH                                           15:13
+#define NVA16F_DMA_OPCODE3                                         17:16
+#define NVA16F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVA16F_DMA_COUNT                                           28:18
+#define NVA16F_DMA_OPCODE                                          31:29
+#define NVA16F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVA16F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVA16F_DMA_DATA                                            31:0

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/clb069.h
+++ b/kernel-open/nvidia-uvm/clb069.h
@@ -1,24 +1,26 @@
-/*******************************************************************************
-    Copyright (c) 2014 NVidia Corporation
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-*******************************************************************************/
 #ifndef _clb069_h_
 #define _clb069_h_

--- a/kernel-open/nvidia-uvm/clb06f.h
+++ b/kernel-open/nvidia-uvm/clb06f.h
@@ -1,28 +1,28 @@
-/*******************************************************************************
-    Copyright (c) 2014 NVIDIA Corporation
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#ifndef _clB06f_h_
-#define _clB06f_h_
+#ifndef _clb06f_h_
+#define _clb06f_h_

 #ifdef __cplusplus
 extern "C" {
@@ -30,10 +30,46 @@ extern "C" {

 #include "nvtypes.h"

+/* class MAXWELL_CHANNEL_GPFIFO  */
+/*
+ * Documentation for MAXWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ */
 #define  MAXWELL_CHANNEL_GPFIFO_A                           (0x0000B06F)

-/* class MAXWELL_CHANNEL_GPFIFO  */
+#define NVB06F_TYPEDEF                             MAXWELL_CHANNELChannelGPFifoA
+
+/* dma flow control data structure */
+typedef volatile struct _clb06f_tag0 {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} Nvb06FControl, MaxwellAControlGPFifo;
+
+/* fields and values */
+#define NVB06F_NUMBER_OF_SUBCHANNELS                               (8)
 #define NVB06F_SET_OBJECT                                          (0x00000000)
+#define NVB06F_SET_OBJECT_NVCLASS                                         15:0
+#define NVB06F_SET_OBJECT_ENGINE                                         20:16
+#define NVB06F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVB06F_ILLEGAL                                             (0x00000004)
+#define NVB06F_ILLEGAL_HANDLE                                             31:0
 #define NVB06F_NOP                                                 (0x00000008)
 #define NVB06F_NOP_HANDLE                                                 31:0
 #define NVB06F_SEMAPHOREA                                          (0x00000010)
@@ -47,6 +83,8 @@ extern "C" {
 #define NVB06F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
 #define NVB06F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
 #define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVB06F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVB06F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
 #define NVB06F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
 #define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
 #define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
@@ -56,8 +94,22 @@ extern "C" {
 #define NVB06F_SEMAPHORED_RELEASE_SIZE                                   24:24
 #define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
 #define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
-
+#define NVB06F_SEMAPHORED_REDUCTION                                      30:27
+#define NVB06F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVB06F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVB06F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVB06F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVB06F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVB06F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVB06F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVB06F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVB06F_SEMAPHORED_FORMAT                                         31:31
+#define NVB06F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVB06F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
 #define NVB06F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVB06F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVB06F_FB_FLUSH                                            (0x00000024)
+#define NVB06F_FB_FLUSH_HANDLE                                            31:0
 // NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
 // possible future MEM_OP features.  MEM_OP_C/D have identical functionality
 // to the previous MEM_OP_A/B methods.
@@ -84,10 +136,27 @@ extern "C" {
 #define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
 #define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
 #define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI                             7:0
+#define NVB06F_SET_REFERENCE                                       (0x00000050)
+#define NVB06F_SET_REFERENCE_COUNT                                        31:0
 #define NVB06F_WFI                                                 (0x00000078)
+#define NVB06F_WFI_SCOPE                                                   0:0
+#define NVB06F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
+#define NVB06F_WFI_SCOPE_ALL                                        0x00000001
+#define NVB06F_CRC_CHECK                                           (0x0000007c)
+#define NVB06F_CRC_CHECK_VALUE                                            31:0
+#define NVB06F_YIELD                                               (0x00000080)
+#define NVB06F_YIELD_OP                                                    1:0
+#define NVB06F_YIELD_OP_NOP                                         0x00000000
+#define NVB06F_YIELD_OP_PBDMA_TIMESLICE                             0x00000001
+#define NVB06F_YIELD_OP_RUNLIST_TIMESLICE                           0x00000002
+#define NVB06F_YIELD_OP_TSG                                         0x00000003
+

 /* GPFIFO entry format */
 #define NVB06F_GP_ENTRY__SIZE                                   8
+#define NVB06F_GP_ENTRY0_FETCH                                0:0
+#define NVB06F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVB06F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
 #define NVB06F_GP_ENTRY0_GET                                 31:2
 #define NVB06F_GP_ENTRY0_OPERAND                             31:0
 #define NVB06F_GP_ENTRY1_GET_HI                               7:0
@@ -98,11 +167,38 @@ extern "C" {
 #define NVB06F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
 #define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
 #define NVB06F_GP_ENTRY1_LENGTH                             30:10
+#define NVB06F_GP_ENTRY1_SYNC                               31:31
+#define NVB06F_GP_ENTRY1_SYNC_PROCEED                  0x00000000
+#define NVB06F_GP_ENTRY1_SYNC_WAIT                     0x00000001
+#define NVB06F_GP_ENTRY1_OPCODE                               7:0
+#define NVB06F_GP_ENTRY1_OPCODE_NOP                    0x00000000
+#define NVB06F_GP_ENTRY1_OPCODE_ILLEGAL                0x00000001
+#define NVB06F_GP_ENTRY1_OPCODE_GP_CRC                 0x00000002
+#define NVB06F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003

 /* dma method formats */
+#define NVB06F_DMA_METHOD_ADDRESS_OLD                              12:2
+#define NVB06F_DMA_METHOD_ADDRESS                                  11:0
+#define NVB06F_DMA_SUBDEVICE_MASK                                  15:4
+#define NVB06F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVB06F_DMA_TERT_OP                                         17:16
+#define NVB06F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVB06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVB06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVB06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVB06F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVB06F_DMA_METHOD_COUNT_OLD                                28:18
+#define NVB06F_DMA_METHOD_COUNT                                    28:16
+#define NVB06F_DMA_IMMD_DATA                                       28:16
 #define NVB06F_DMA_SEC_OP                                          31:29
+#define NVB06F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
 #define NVB06F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVB06F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
 #define NVB06F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+#define NVB06F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVB06F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVB06F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVB06F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
 /* dma incrementing method format */
 #define NVB06F_DMA_INCR_ADDRESS                                    11:0
 #define NVB06F_DMA_INCR_SUBCHANNEL                                 15:13
@@ -132,9 +228,33 @@ extern "C" {
 #define NVB06F_DMA_IMMD_DATA                                       28:16
 #define NVB06F_DMA_IMMD_OPCODE                                     31:29
 #define NVB06F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVB06F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVB06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVB06F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVB06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVB06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVB06F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVB06F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVB06F_DMA_ADDRESS                                         12:2
+#define NVB06F_DMA_SUBCH                                           15:13
+#define NVB06F_DMA_OPCODE3                                         17:16
+#define NVB06F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVB06F_DMA_COUNT                                           28:18
+#define NVB06F_DMA_OPCODE                                          31:29
+#define NVB06F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVB06F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVB06F_DMA_DATA                                            31:0

 #ifdef __cplusplus
 };     /* extern "C" */
 #endif

-#endif /* _clB06F_h_ */
+#endif /* _clb06f_h_ */
--- a/kernel-open/nvidia-uvm/clb0b5.h
+++ b/kernel-open/nvidia-uvm/clb0b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 2014 NVIDIA Corporation
+    Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {

 #define MAXWELL_DMA_COPY_A                                                            (0x0000B0B5)

+#define NVB0B5_NOP                                                              (0x00000100)
+#define NVB0B5_NOP_PARAMETER                                                    31:0
+#define NVB0B5_PM_TRIGGER                                                       (0x00000140)
+#define NVB0B5_PM_TRIGGER_V                                                     31:0
 #define NVB0B5_SET_SEMAPHORE_A                                                  (0x00000240)
 #define NVB0B5_SET_SEMAPHORE_A_UPPER                                            7:0
 #define NVB0B5_SET_SEMAPHORE_B                                                  (0x00000244)
@@ -183,9 +187,75 @@ extern "C" {
 #define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
 #define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
 #define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
-
+#define NVB0B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_QUARTER_GOB                             (0x0000000E)
+#define NVB0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVB0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVB0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4                 (0x00000000)
+#define NVB0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVB0B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVB0B5_SET_DST_WIDTH_V                                                  31:0
+#define NVB0B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVB0B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVB0B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVB0B5_SET_DST_DEPTH_V                                                  31:0
+#define NVB0B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVB0B5_SET_DST_LAYER_V                                                  31:0
+#define NVB0B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVB0B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVB0B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVB0B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_QUARTER_GOB                             (0x0000000E)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_TESLA_4                 (0x00000000)
+#define NVB0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVB0B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVB0B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVB0B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVB0B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVB0B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVB0B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVB0B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVB0B5_SET_SRC_LAYER_V                                                  31:0
+#define NVB0B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVB0B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVB0B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVB0B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVB0B5_PM_TRIGGER_END_V                                                 31:0

 #ifdef __cplusplus
 };     /* extern "C" */
 #endif
 #endif // _clb0b5_h
+
--- a/kernel-open/nvidia-uvm/clc06f.h
+++ b/kernel-open/nvidia-uvm/clc06f.h
@@ -1,25 +1,25 @@
-/*******************************************************************************
-    Copyright (c) 2014 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef _clc06f_h_
 #define _clc06f_h_
@@ -30,10 +30,47 @@ extern "C" {

 #include "nvtypes.h"

+/* class PASCAL_CHANNEL_GPFIFO  */
+/*
+ * Documentation for PASCAL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
+ * chapter "User Control Registers". It is documented as device NV_UDMA.
+ * The GPFIFO format itself is also documented in dev_pbdma.ref,
+ * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
+ * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ *
+ * Note there is no .mfs file for this class.
+ */
 #define  PASCAL_CHANNEL_GPFIFO_A                           (0x0000C06F)

-/* class PASCAL_CHANNEL_GPFIFO_A */
+#define NVC06F_TYPEDEF                             PASCAL_CHANNELChannelGPFifoA
+
+/* dma flow control data structure */
+typedef volatile struct Nvc06fControl_struct {
+ NvU32 Ignored00[0x010];        /*                                  0000-003f*/
+ NvU32 Put;                     /* put offset, read/write           0040-0043*/
+ NvU32 Get;                     /* get offset, read only            0044-0047*/
+ NvU32 Reference;               /* reference value, read only       0048-004b*/
+ NvU32 PutHi;                   /* high order put offset bits       004c-004f*/
+ NvU32 Ignored01[0x002];        /*                                  0050-0057*/
+ NvU32 TopLevelGet;             /* top level get offset, read only  0058-005b*/
+ NvU32 TopLevelGetHi;           /* high order top level get bits    005c-005f*/
+ NvU32 GetHi;                   /* high order get offset bits       0060-0063*/
+ NvU32 Ignored02[0x007];        /*                                  0064-007f*/
+ NvU32 Ignored03;               /* used to be engine yield          0080-0083*/
+ NvU32 Ignored04[0x001];        /*                                  0084-0087*/
+ NvU32 GPGet;                   /* GP FIFO get offset, read only    0088-008b*/
+ NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
+ NvU32 Ignored05[0x5c];
+} Nvc06fControl, PascalAControlGPFifo;
+
+/* fields and values */
+#define NVC06F_NUMBER_OF_SUBCHANNELS                               (8)
 #define NVC06F_SET_OBJECT                                          (0x00000000)
+#define NVC06F_SET_OBJECT_NVCLASS                                         15:0
+#define NVC06F_SET_OBJECT_ENGINE                                         20:16
+#define NVC06F_SET_OBJECT_ENGINE_SW                                 0x0000001f
+#define NVC06F_ILLEGAL                                             (0x00000004)
+#define NVC06F_ILLEGAL_HANDLE                                             31:0
 #define NVC06F_NOP                                                 (0x00000008)
 #define NVC06F_NOP_HANDLE                                                 31:0
 #define NVC06F_SEMAPHOREA                                          (0x00000010)
@@ -47,54 +84,33 @@ extern "C" {
 #define NVC06F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
 #define NVC06F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
 #define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
+#define NVC06F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
+#define NVC06F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
 #define NVC06F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
 #define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
 #define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
-
-
-/* GPFIFO entry format */
-#define NVC06F_GP_ENTRY__SIZE                                   8
-#define NVC06F_GP_ENTRY0_GET                                 31:2
-#define NVC06F_GP_ENTRY0_OPERAND                             31:0
-#define NVC06F_GP_ENTRY1_GET_HI                               7:0
-#define NVC06F_GP_ENTRY1_PRIV                                 8:8
-#define NVC06F_GP_ENTRY1_PRIV_USER                     0x00000000
-#define NVC06F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
-#define NVC06F_GP_ENTRY1_LEVEL                                9:9
-#define NVC06F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
-#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
-#define NVC06F_GP_ENTRY1_LENGTH                             30:10
-
-/* dma incrementing method format */
-#define NVC06F_DMA_INCR_ADDRESS                                    11:0
-#define NVC06F_DMA_INCR_SUBCHANNEL                                 15:13
-#define NVC06F_DMA_INCR_COUNT                                      28:16
-#define NVC06F_DMA_INCR_OPCODE                                     31:29
-#define NVC06F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
-#define NVC06F_DMA_INCR_DATA                                       31:0
-/* dma non-incrementing method format */
-#define NVC06F_DMA_NONINCR_ADDRESS                                 11:0
-#define NVC06F_DMA_NONINCR_SUBCHANNEL                              15:13
-#define NVC06F_DMA_NONINCR_COUNT                                   28:16
-#define NVC06F_DMA_NONINCR_OPCODE                                  31:29
-#define NVC06F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
-#define NVC06F_DMA_NONINCR_DATA                                    31:0
-/* dma increment-once method format */
-#define NVC06F_DMA_ONEINCR_ADDRESS                                 11:0
-#define NVC06F_DMA_ONEINCR_SUBCHANNEL                              15:13
-#define NVC06F_DMA_ONEINCR_COUNT                                   28:16
-#define NVC06F_DMA_ONEINCR_OPCODE                                  31:29
-#define NVC06F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
-#define NVC06F_DMA_ONEINCR_DATA                                    31:0
-/* dma no-operation format */
-#define NVC06F_DMA_NOP                                             (0x00000000)
-/* dma immediate-data format */
-#define NVC06F_DMA_IMMD_ADDRESS                                    11:0
-#define NVC06F_DMA_IMMD_SUBCHANNEL                                 15:13
-#define NVC06F_DMA_IMMD_DATA                                       28:16
-#define NVC06F_DMA_IMMD_OPCODE                                     31:29
-#define NVC06F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
-
+#define NVC06F_SEMAPHORED_RELEASE_WFI                                    20:20
+#define NVC06F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
+#define NVC06F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
+#define NVC06F_SEMAPHORED_RELEASE_SIZE                                   24:24
+#define NVC06F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
+#define NVC06F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
+#define NVC06F_SEMAPHORED_REDUCTION                                      30:27
+#define NVC06F_SEMAPHORED_REDUCTION_MIN                             0x00000000
+#define NVC06F_SEMAPHORED_REDUCTION_MAX                             0x00000001
+#define NVC06F_SEMAPHORED_REDUCTION_XOR                             0x00000002
+#define NVC06F_SEMAPHORED_REDUCTION_AND                             0x00000003
+#define NVC06F_SEMAPHORED_REDUCTION_OR                              0x00000004
+#define NVC06F_SEMAPHORED_REDUCTION_ADD                             0x00000005
+#define NVC06F_SEMAPHORED_REDUCTION_INC                             0x00000006
+#define NVC06F_SEMAPHORED_REDUCTION_DEC                             0x00000007
+#define NVC06F_SEMAPHORED_FORMAT                                         31:31
+#define NVC06F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
+#define NVC06F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
+#define NVC06F_NON_STALL_INTERRUPT                                 (0x00000020)
+#define NVC06F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NVC06F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
+#define NVC06F_FB_FLUSH_HANDLE                                            31:0
 // NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
 // specifying the page address for a targeted TLB invalidate and the uTLB for
 // a targeted REPLAY_CANCEL for UVM.
@@ -153,19 +169,142 @@ extern "C" {
 #define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
 #define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
 // CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC06F_MEM_OP_D_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+// This B alias is confusing but it was missed as part of the update. Left here
+// for compatibility.
 #define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
 #define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
 #define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
 #define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
 #define NVC06F_SET_REFERENCE                                       (0x00000050)
 #define NVC06F_SET_REFERENCE_COUNT                                        31:0
-
+// Syncpoint methods are only available on Tegra parts.  Attempting to use
+// them on discrete GPUs will result in Host raising NV_PPBDMA_INTR_0_METHOD.
+#define NVC06F_SYNCPOINTA                                          (0x00000070)
+#define NVC06F_SYNCPOINTA_PAYLOAD                                         31:0
+#define NVC06F_SYNCPOINTB                                          (0x00000074)
+#define NVC06F_SYNCPOINTB_OPERATION                                        0:0
+#define NVC06F_SYNCPOINTB_OPERATION_WAIT                            0x00000000
+#define NVC06F_SYNCPOINTB_OPERATION_INCR                            0x00000001
+#define NVC06F_SYNCPOINTB_WAIT_SWITCH                                      4:4
+#define NVC06F_SYNCPOINTB_WAIT_SWITCH_DIS                           0x00000000
+#define NVC06F_SYNCPOINTB_WAIT_SWITCH_EN                            0x00000001
+#define NVC06F_SYNCPOINTB_SYNCPT_INDEX                                    19:8
 #define NVC06F_WFI                                                 (0x00000078)
 #define NVC06F_WFI_SCOPE                                                   0:0
 #define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
 #define NVC06F_WFI_SCOPE_ALL                                        0x00000001
+#define NVC06F_CRC_CHECK                                           (0x0000007c)
+#define NVC06F_CRC_CHECK_VALUE                                            31:0
+#define NVC06F_YIELD                                               (0x00000080)
+#define NVC06F_YIELD_OP                                                    1:0
+#define NVC06F_YIELD_OP_NOP                                         0x00000000
+#define NVC06F_YIELD_OP_PBDMA_TIMESLICE                             0x00000001
+#define NVC06F_YIELD_OP_RUNLIST_TIMESLICE                           0x00000002
+#define NVC06F_YIELD_OP_TSG                                         0x00000003


+/* GPFIFO entry format */
+#define NVC06F_GP_ENTRY__SIZE                                   8
+#define NVC06F_GP_ENTRY0_FETCH                                0:0
+#define NVC06F_GP_ENTRY0_FETCH_UNCONDITIONAL           0x00000000
+#define NVC06F_GP_ENTRY0_FETCH_CONDITIONAL             0x00000001
+#define NVC06F_GP_ENTRY0_GET                                 31:2
+#define NVC06F_GP_ENTRY0_OPERAND                             31:0
+#define NVC06F_GP_ENTRY1_GET_HI                               7:0
+#define NVC06F_GP_ENTRY1_PRIV                                 8:8
+#define NVC06F_GP_ENTRY1_PRIV_USER                     0x00000000
+#define NVC06F_GP_ENTRY1_PRIV_KERNEL                   0x00000001
+#define NVC06F_GP_ENTRY1_LEVEL                                9:9
+#define NVC06F_GP_ENTRY1_LEVEL_MAIN                    0x00000000
+#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE              0x00000001
+#define NVC06F_GP_ENTRY1_LENGTH                             30:10
+#define NVC06F_GP_ENTRY1_SYNC                               31:31
+#define NVC06F_GP_ENTRY1_SYNC_PROCEED                  0x00000000
+#define NVC06F_GP_ENTRY1_SYNC_WAIT                     0x00000001
+#define NVC06F_GP_ENTRY1_OPCODE                               7:0
+#define NVC06F_GP_ENTRY1_OPCODE_NOP                    0x00000000
+#define NVC06F_GP_ENTRY1_OPCODE_ILLEGAL                0x00000001
+#define NVC06F_GP_ENTRY1_OPCODE_GP_CRC                 0x00000002
+#define NVC06F_GP_ENTRY1_OPCODE_PB_CRC                 0x00000003
+
+/* dma method formats */
+#define NVC06F_DMA_METHOD_ADDRESS_OLD                              12:2
+#define NVC06F_DMA_METHOD_ADDRESS                                  11:0
+#define NVC06F_DMA_SUBDEVICE_MASK                                  15:4
+#define NVC06F_DMA_METHOD_SUBCHANNEL                               15:13
+#define NVC06F_DMA_TERT_OP                                         17:16
+#define NVC06F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
+#define NVC06F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
+#define NVC06F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
+#define NVC06F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
+#define NVC06F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
+#define NVC06F_DMA_METHOD_COUNT_OLD                                28:18
+#define NVC06F_DMA_METHOD_COUNT                                    28:16
+#define NVC06F_DMA_IMMD_DATA                                       28:16
+#define NVC06F_DMA_SEC_OP                                          31:29
+#define NVC06F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
+#define NVC06F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
+#define NVC06F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
+#define NVC06F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
+#define NVC06F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
+#define NVC06F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
+#define NVC06F_DMA_SEC_OP_RESERVED6                                (0x00000006)
+#define NVC06F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
+/* dma incrementing method format */
+#define NVC06F_DMA_INCR_ADDRESS                                    11:0
+#define NVC06F_DMA_INCR_SUBCHANNEL                                 15:13
+#define NVC06F_DMA_INCR_COUNT                                      28:16
+#define NVC06F_DMA_INCR_OPCODE                                     31:29
+#define NVC06F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
+#define NVC06F_DMA_INCR_DATA                                       31:0
+/* dma non-incrementing method format */
+#define NVC06F_DMA_NONINCR_ADDRESS                                 11:0
+#define NVC06F_DMA_NONINCR_SUBCHANNEL                              15:13
+#define NVC06F_DMA_NONINCR_COUNT                                   28:16
+#define NVC06F_DMA_NONINCR_OPCODE                                  31:29
+#define NVC06F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
+#define NVC06F_DMA_NONINCR_DATA                                    31:0
+/* dma increment-once method format */
+#define NVC06F_DMA_ONEINCR_ADDRESS                                 11:0
+#define NVC06F_DMA_ONEINCR_SUBCHANNEL                              15:13
+#define NVC06F_DMA_ONEINCR_COUNT                                   28:16
+#define NVC06F_DMA_ONEINCR_OPCODE                                  31:29
+#define NVC06F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
+#define NVC06F_DMA_ONEINCR_DATA                                    31:0
+/* dma no-operation format */
+#define NVC06F_DMA_NOP                                             (0x00000000)
+/* dma immediate-data format */
+#define NVC06F_DMA_IMMD_ADDRESS                                    11:0
+#define NVC06F_DMA_IMMD_SUBCHANNEL                                 15:13
+#define NVC06F_DMA_IMMD_DATA                                       28:16
+#define NVC06F_DMA_IMMD_OPCODE                                     31:29
+#define NVC06F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
+/* dma set sub-device mask format */
+#define NVC06F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
+#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC06F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
+/* dma store sub-device mask format */
+#define NVC06F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
+#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
+#define NVC06F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
+/* dma use sub-device mask format */
+#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
+#define NVC06F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
+/* dma end-segment format */
+#define NVC06F_DMA_ENDSEG_OPCODE                                   31:29
+#define NVC06F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
+/* dma legacy incrementing/non-incrementing formats */
+#define NVC06F_DMA_ADDRESS                                         12:2
+#define NVC06F_DMA_SUBCH                                           15:13
+#define NVC06F_DMA_OPCODE3                                         17:16
+#define NVC06F_DMA_OPCODE3_NONE                                    (0x00000000)
+#define NVC06F_DMA_COUNT                                           28:18
+#define NVC06F_DMA_OPCODE                                          31:29
+#define NVC06F_DMA_OPCODE_METHOD                                   (0x00000000)
+#define NVC06F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
+#define NVC06F_DMA_DATA                                            31:0
+
 #ifdef __cplusplus
 };     /* extern "C" */
 #endif
--- a/kernel-open/nvidia-uvm/clc0b5.h
+++ b/kernel-open/nvidia-uvm/clc0b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 2014 NVIDIA Corporation
+    Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {

 #define PASCAL_DMA_COPY_A                                                            (0x0000C0B5)

+#define NVC0B5_NOP                                                              (0x00000100)
+#define NVC0B5_NOP_PARAMETER                                                    31:0
+#define NVC0B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC0B5_PM_TRIGGER_V                                                     31:0
 #define NVC0B5_SET_SEMAPHORE_A                                                  (0x00000240)
 #define NVC0B5_SET_SEMAPHORE_A_UPPER                                            16:0
 #define NVC0B5_SET_SEMAPHORE_B                                                  (0x00000244)
@@ -115,6 +119,10 @@ extern "C" {
 #define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
 #define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
 #define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2                                         21:21
+#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC0B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC0B5_LAUNCH_DMA_RESERVED                                              31:28
 #define NVC0B5_OFFSET_IN_UPPER                                                  (0x00000400)
 #define NVC0B5_OFFSET_IN_UPPER_UPPER                                            16:0
 #define NVC0B5_OFFSET_IN_LOWER                                                  (0x00000404)
@@ -183,6 +191,68 @@ extern "C" {
 #define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
 #define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
 #define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC0B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC0B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC0B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC0B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC0B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC0B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC0B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC0B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC0B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC0B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC0B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC0B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC0B5_SET_DST_LAYER_V                                                  31:0
+#define NVC0B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC0B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC0B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC0B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC0B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC0B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC0B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC0B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC0B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC0B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC0B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC0B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC0B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC0B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC0B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC0B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC0B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC0B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC0B5_PM_TRIGGER_END_V                                                 31:0

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/clc1b5.h
+++ b/kernel-open/nvidia-uvm/clc1b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 2014 NVIDIA Corporation
+    Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {

 #define PASCAL_DMA_COPY_B                                                            (0x0000C1B5)

+#define NVC1B5_NOP                                                              (0x00000100)
+#define NVC1B5_NOP_PARAMETER                                                    31:0
+#define NVC1B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC1B5_PM_TRIGGER_V                                                     31:0
 #define NVC1B5_SET_SEMAPHORE_A                                                  (0x00000240)
 #define NVC1B5_SET_SEMAPHORE_A_UPPER                                            16:0
 #define NVC1B5_SET_SEMAPHORE_B                                                  (0x00000244)
@@ -115,6 +119,14 @@ extern "C" {
 #define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2                                         20:20
 #define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
 #define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2                                         21:21
+#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING                         (0x00000000)
+#define NVC1B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE                          (0x00000001)
+#define NVC1B5_LAUNCH_DMA_VPRMODE                                               23:22
+#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
+#define NVC1B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
+#define NVC1B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
+#define NVC1B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
 #define NVC1B5_OFFSET_IN_UPPER                                                  (0x00000400)
 #define NVC1B5_OFFSET_IN_UPPER_UPPER                                            16:0
 #define NVC1B5_OFFSET_IN_LOWER                                                  (0x00000404)
@@ -183,6 +195,76 @@ extern "C" {
 #define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
 #define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
 #define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC1B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC1B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC1B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC1B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC1B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC1B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC1B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC1B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC1B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC1B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC1B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC1B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC1B5_SET_DST_LAYER_V                                                  31:0
+#define NVC1B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC1B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC1B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC1B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC1B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC1B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC1B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC1B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC1B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC1B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC1B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC1B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC1B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC1B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC1B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC1B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC1B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC1B5_SRC_ORIGIN_X                                                     (0x00000744)
+#define NVC1B5_SRC_ORIGIN_X_VALUE                                               31:0
+#define NVC1B5_SRC_ORIGIN_Y                                                     (0x00000748)
+#define NVC1B5_SRC_ORIGIN_Y_VALUE                                               31:0
+#define NVC1B5_DST_ORIGIN_X                                                     (0x0000074C)
+#define NVC1B5_DST_ORIGIN_X_VALUE                                               31:0
+#define NVC1B5_DST_ORIGIN_Y                                                     (0x00000750)
+#define NVC1B5_DST_ORIGIN_Y_VALUE                                               31:0
+#define NVC1B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC1B5_PM_TRIGGER_END_V                                                 31:0

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/clc365.h
+++ b/kernel-open/nvidia-uvm/clc365.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
--- a/kernel-open/nvidia-uvm/clc369.h
+++ b/kernel-open/nvidia-uvm/clc369.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
--- a/kernel-open/nvidia-uvm/clc3b5.h
+++ b/kernel-open/nvidia-uvm/clc3b5.h
@@ -1,19 +1,19 @@
 /*******************************************************************************
-    Copyright (c) 2016 NVIDIA Corporation
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:

-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -32,6 +32,10 @@ extern "C" {

 #define VOLTA_DMA_COPY_A                                                            (0x0000C3B5)

+#define NVC3B5_NOP                                                              (0x00000100)
+#define NVC3B5_NOP_PARAMETER                                                    31:0
+#define NVC3B5_PM_TRIGGER                                                       (0x00000140)
+#define NVC3B5_PM_TRIGGER_V                                                     31:0
 #define NVC3B5_SET_SEMAPHORE_A                                                  (0x00000240)
 #define NVC3B5_SET_SEMAPHORE_A_UPPER                                            16:0
 #define NVC3B5_SET_SEMAPHORE_B                                                  (0x00000244)
@@ -69,6 +73,9 @@ extern "C" {
 #define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
 #define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
 #define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
+#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
+#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
+#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
 #define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
 #define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
 #define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
@@ -123,8 +130,6 @@ extern "C" {
 #define NVC3B5_LAUNCH_DMA_VPRMODE                                               23:22
 #define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
 #define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS                                   (0x00000002)
-#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID                                   (0x00000003)
 #define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
 #define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
 #define NVC3B5_OFFSET_IN_UPPER                                                  (0x00000400)
@@ -195,6 +200,76 @@ extern "C" {
 #define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
 #define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
 #define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
+#define NVC3B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
+#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC3B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC3B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC3B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC3B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC3B5_SET_DST_WIDTH                                                    (0x00000710)
+#define NVC3B5_SET_DST_WIDTH_V                                                  31:0
+#define NVC3B5_SET_DST_HEIGHT                                                   (0x00000714)
+#define NVC3B5_SET_DST_HEIGHT_V                                                 31:0
+#define NVC3B5_SET_DST_DEPTH                                                    (0x00000718)
+#define NVC3B5_SET_DST_DEPTH_V                                                  31:0
+#define NVC3B5_SET_DST_LAYER                                                    (0x0000071C)
+#define NVC3B5_SET_DST_LAYER_V                                                  31:0
+#define NVC3B5_SET_DST_ORIGIN                                                   (0x00000720)
+#define NVC3B5_SET_DST_ORIGIN_X                                                 15:0
+#define NVC3B5_SET_DST_ORIGIN_Y                                                 31:16
+#define NVC3B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
+#define NVC3B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
+#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
+#define NVC3B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
+#define NVC3B5_SET_SRC_WIDTH                                                    (0x0000072C)
+#define NVC3B5_SET_SRC_WIDTH_V                                                  31:0
+#define NVC3B5_SET_SRC_HEIGHT                                                   (0x00000730)
+#define NVC3B5_SET_SRC_HEIGHT_V                                                 31:0
+#define NVC3B5_SET_SRC_DEPTH                                                    (0x00000734)
+#define NVC3B5_SET_SRC_DEPTH_V                                                  31:0
+#define NVC3B5_SET_SRC_LAYER                                                    (0x00000738)
+#define NVC3B5_SET_SRC_LAYER_V                                                  31:0
+#define NVC3B5_SET_SRC_ORIGIN                                                   (0x0000073C)
+#define NVC3B5_SET_SRC_ORIGIN_X                                                 15:0
+#define NVC3B5_SET_SRC_ORIGIN_Y                                                 31:16
+#define NVC3B5_SRC_ORIGIN_X                                                     (0x00000744)
+#define NVC3B5_SRC_ORIGIN_X_VALUE                                               31:0
+#define NVC3B5_SRC_ORIGIN_Y                                                     (0x00000748)
+#define NVC3B5_SRC_ORIGIN_Y_VALUE                                               31:0
+#define NVC3B5_DST_ORIGIN_X                                                     (0x0000074C)
+#define NVC3B5_DST_ORIGIN_X_VALUE                                               31:0
+#define NVC3B5_DST_ORIGIN_Y                                                     (0x00000750)
+#define NVC3B5_DST_ORIGIN_Y_VALUE                                               31:0
+#define NVC3B5_PM_TRIGGER_END                                                   (0x00001114)
+#define NVC3B5_PM_TRIGGER_END_V                                                 31:0

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/clcba2.h
+++ b/kernel-open/nvidia-uvm/clcba2.h
@@ -0,0 +1,97 @@
+/*******************************************************************************
+    Copyright (c) 2021-2022 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "nvtypes.h"
+
+#ifndef _clcba2_h_
+#define _clcba2_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HOPPER_SEC2_WORK_LAUNCH_A                                               (0x0000CBA2)
+
+#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI                                         (0x00000400)
+#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA                                    24:0
+#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO                                         (0x00000404)
+#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO_DATA                                    31:4
+#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI                                         (0x00000408)
+#define NVCBA2_DECRYPT_COPY_DST_ADDR_HI_DATA                                    24:0
+#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO                                         (0x0000040c)
+#define NVCBA2_DECRYPT_COPY_DST_ADDR_LO_DATA                                    31:4
+#define NVCBA2_DECRYPT_COPY_SIZE                                                (0x00000410)
+#define NVCBA2_DECRYPT_COPY_SIZE_DATA                                           31:2
+#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI                                    (0x00000414)
+#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_HI_DATA                               24:0
+#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO                                    (0x00000418)
+#define NVCBA2_DECRYPT_COPY_AUTH_TAG_ADDR_LO_DATA                               31:4
+#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI                                   (0x0000041C)
+#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI_DATA                              24:0
+#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO                                   (0x00000420)
+#define NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO_DATA                              31:4
+#define NVCBA2_SEMAPHORE_A                                                      (0x00000440)
+#define NVCBA2_SEMAPHORE_A_UPPER                                                24:0
+#define NVCBA2_SEMAPHORE_B                                                      (0x00000444)
+#define NVCBA2_SEMAPHORE_B_LOWER                                                31:2
+#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER                                      (0x00000448)
+#define NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER_DATA                                 31:0
+#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x0000044C)
+#define NVCBA2_SET_SEMAPHORE_PAYLOAD_UPPER_DATA                                 31:0
+#define NVCBA2_SEMAPHORE_D                                                      (0x00000450)
+#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR                                          0:0
+#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_DISABLE                                  (0x00000000)
+#define NVCBA2_SEMAPHORE_D_NOTIFY_INTR_ENABLE                                   (0x00000001)
+#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE                                         1:1
+#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_32_BIT                                  (0x00000000)
+#define NVCBA2_SEMAPHORE_D_PAYLOAD_SIZE_64_BIT                                  (0x00000001)
+#define NVCBA2_SEMAPHORE_D_TIMESTAMP                                            2:2
+#define NVCBA2_SEMAPHORE_D_TIMESTAMP_DISABLE                                    (0x00000000)
+#define NVCBA2_SEMAPHORE_D_TIMESTAMP_ENABLE                                     (0x00000001)
+#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE                                        3:3
+#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_FALSE                                  (0x00000000)
+#define NVCBA2_SEMAPHORE_D_FLUSH_DISABLE_TRUE                                   (0x00000001)
+#define NVCBA2_EXECUTE                                                          (0x00000470)
+#define NVCBA2_EXECUTE_NOTIFY                                                   0:0
+#define NVCBA2_EXECUTE_NOTIFY_DISABLE                                           (0x00000000)
+#define NVCBA2_EXECUTE_NOTIFY_ENABLE                                            (0x00000001)
+#define NVCBA2_EXECUTE_NOTIFY_ON                                                1:1
+#define NVCBA2_EXECUTE_NOTIFY_ON_END                                            (0x00000000)
+#define NVCBA2_EXECUTE_NOTIFY_ON_BEGIN                                          (0x00000001)
+#define NVCBA2_EXECUTE_FLUSH_DISABLE                                            2:2
+#define NVCBA2_EXECUTE_FLUSH_DISABLE_FALSE                                      (0x00000000)
+#define NVCBA2_EXECUTE_FLUSH_DISABLE_TRUE                                       (0x00000001)
+#define NVCBA2_EXECUTE_NOTIFY_INTR                                              3:3
+#define NVCBA2_EXECUTE_NOTIFY_INTR_DISABLE                                      (0x00000000)
+#define NVCBA2_EXECUTE_NOTIFY_INTR_ENABLE                                       (0x00000001)
+#define NVCBA2_EXECUTE_PAYLOAD_SIZE                                             4:4
+#define NVCBA2_EXECUTE_PAYLOAD_SIZE_32_BIT                                      (0x00000000)
+#define NVCBA2_EXECUTE_PAYLOAD_SIZE_64_BIT                                      (0x00000001)
+#define NVCBA2_EXECUTE_TIMESTAMP                                                5:5
+#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE                                        (0x00000000)
+#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE                                         (0x00000001)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+#endif // _clcba2_h
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2021 NVIDIA Corporation
+    Copyright (c) 2013-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
--- a/kernel-open/nvidia-uvm/nv-kthread-q.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q.c
@@ -301,7 +301,7 @@ static void _q_flush_function(void *args)
 static void _raw_q_flush(nv_kthread_q_t *q)
 {
    nv_kthread_q_item_t q_item;
-    DECLARE_COMPLETION(completion);
+    DECLARE_COMPLETION_ONSTACK(completion);

    nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);

--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -1,12 +1,11 @@
 NVIDIA_UVM_SOURCES ?=
 NVIDIA_UVM_SOURCES_CXX ?=

-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
@@ -58,6 +57,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
@@ -72,6 +72,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
@@ -94,7 +100,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
 #

 ifeq ($(UVM_BUILD_TYPE),debug)
-  NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
+  NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
 else
  ifeq ($(UVM_BUILD_TYPE),develop)
    # -DDEBUG is required, in order to allow pr_devel() print statements to
@@ -81,7 +81,12 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno

 NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
@@ -99,8 +104,12 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
 NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
 NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
-NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
+NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
+NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
+NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
+NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
-NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -28,6 +28,7 @@
 #include "uvm_lock.h"
 #include "uvm_test.h"
 #include "uvm_va_space.h"
+#include "uvm_va_space_mm.h"
 #include "uvm_va_range.h"
 #include "uvm_va_block.h"
 #include "uvm_tools.h"
@@ -35,26 +36,200 @@
 #include "uvm_linux_ioctl.h"
 #include "uvm_hmm.h"
 #include "uvm_mem.h"
+#include "uvm_kvmalloc.h"

 #define NVIDIA_UVM_DEVICE_NAME          "nvidia-uvm"

 static dev_t g_uvm_base_dev;
 static struct cdev g_uvm_cdev;
+static const struct file_operations uvm_fops;

-static int uvm_open(struct inode *inode, struct file *filp)
+bool uvm_file_is_nvidia_uvm(struct file *filp)
 {
-    NV_STATUS status = uvm_global_get_status();
+    return (filp != NULL) && (filp->f_op == &uvm_fops);
+}

-    if (status == NV_OK) {
-        if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
+{
+    unsigned long uptr;
+    uvm_fd_type_t type;
+    void *ptr;

-        status = uvm_va_space_create(inode, filp);
+    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));

-        uvm_up_read(&g_uvm_global.pm.lock);
+    uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
+    type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
+    ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
+    BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
+
+    switch (type) {
+        case UVM_FD_UNINITIALIZED:
+        case UVM_FD_INITIALIZING:
+            UVM_ASSERT(!ptr);
+            break;
+
+        case UVM_FD_VA_SPACE:
+            UVM_ASSERT(ptr);
+            BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
+            break;
+
+        case UVM_FD_MM:
+            UVM_ASSERT(ptr);
+            BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
+            break;
+
+        default:
+            UVM_ASSERT(0);
    }

-    return -nv_status_to_errno(status);
+    if (ptr_val)
+        *ptr_val = ptr;
+
+    return type;
+}
+
+void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
+{
+    void *ptr;
+
+    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
+
+    if (uvm_fd_type(filp, &ptr) == type)
+        return ptr;
+    else
+        return NULL;
+}
+
+static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space;
+    uvm_va_space_mm_t *va_space_mm;
+    struct file *uvm_file;
+    uvm_fd_type_t old_fd_type;
+    struct mm_struct *mm;
+    NV_STATUS status;
+
+    uvm_file = fget(params->uvmFd);
+    if (!uvm_file_is_nvidia_uvm(uvm_file)) {
+        status = NV_ERR_INVALID_ARGUMENT;
+        goto err;
+    }
+
+    if (uvm_fd_type(uvm_file, (void **)&va_space) != UVM_FD_VA_SPACE) {
+        status = NV_ERR_INVALID_ARGUMENT;
+        goto err;
+    }
+
+    // Tell userspace the MM FD is not required and it may be released
+    // with no loss of functionality.
+    if (!uvm_va_space_mm_enabled(va_space)) {
+        status = NV_WARN_NOTHING_TO_DO;
+        goto err;
+    }
+
+    old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
+                                         UVM_FD_UNINITIALIZED,
+                                         UVM_FD_INITIALIZING);
+    old_fd_type &= UVM_FD_TYPE_MASK;
+    if (old_fd_type != UVM_FD_UNINITIALIZED) {
+        status = NV_ERR_IN_USE;
+        goto err;
+    }
+
+    va_space_mm = &va_space->va_space_mm;
+    uvm_spin_lock(&va_space_mm->lock);
+    switch (va_space->va_space_mm.state) {
+        // We only allow the va_space_mm to be initialised once. If
+        // userspace passed the UVM FD to another process it is up to
+        // userspace to ensure it also passes the UVM MM FD that
+        // initialised the va_space_mm or arranges some other way to keep
+        // a reference on the FD.
+        case UVM_VA_SPACE_MM_STATE_ALIVE:
+            status = NV_ERR_INVALID_STATE;
+            goto err_release_unlock;
+            break;
+
+        // Once userspace has released the va_space_mm the GPU is
+        // effectively dead and no new work can be started. We don't
+        // support re-initializing once userspace has closed the FD.
+        case UVM_VA_SPACE_MM_STATE_RELEASED:
+            status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
+            goto err_release_unlock;
+            break;
+
+        // Keep the warnings at bay
+        case UVM_VA_SPACE_MM_STATE_UNINITIALIZED:
+            mm = va_space->va_space_mm.mm;
+            if (!mm || !mmget_not_zero(mm)) {
+                status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
+                goto err_release_unlock;
+            }
+
+            va_space_mm->state = UVM_VA_SPACE_MM_STATE_ALIVE;
+            break;
+
+        default:
+            UVM_ASSERT(0);
+            break;
+    }
+    uvm_spin_unlock(&va_space_mm->lock);
+    atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
+
+    return NV_OK;
+
+err_release_unlock:
+    uvm_spin_unlock(&va_space_mm->lock);
+    atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
+
+err:
+    if (uvm_file)
+        fput(uvm_file);
+
+    return status;
+}
+
+// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
+// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
+// lock will need to be taken.
+static int uvm_open(struct inode *inode, struct file *filp)
+{
+    struct address_space *mapping;
+    NV_STATUS status = uvm_global_get_status();
+
+    if (status != NV_OK)
+        return -nv_status_to_errno(status);
+
+    mapping = uvm_kvmalloc(sizeof(*mapping));
+    if (!mapping)
+        return -ENOMEM;
+
+    // By default all struct files on the same inode share the same
+    // address_space structure (the inode's) across all processes. This means
+    // unmap_mapping_range would unmap virtual mappings across all processes on
+    // that inode.
+    //
+    // Since the UVM driver uses the mapping offset as the VA of the file's
+    // process, we need to isolate the mappings to each process.
+    address_space_init_once(mapping);
+    mapping->host = inode;
+
+    // Some paths in the kernel, for example force_page_cache_readahead which
+    // can be invoked from user-space via madvise MADV_WILLNEED and fadvise
+    // POSIX_FADV_WILLNEED, check the function pointers within
+    // file->f_mapping->a_ops for validity. However, those paths assume that a_ops
+    // itself is always valid. Handle that by using the inode's a_ops pointer,
+    // which is what f_mapping->a_ops would point to anyway if we weren't re-
+    // assigning f_mapping.
+    mapping->a_ops = inode->i_mapping->a_ops;
+
+#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
+    mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
+#endif
+
+    filp->private_data = NULL;
+    filp->f_mapping = mapping;
+
+    return NV_OK;
 }

 static int uvm_open_entry(struct inode *inode, struct file *filp)
@@ -78,11 +253,44 @@ static void uvm_release_deferred(void *data)
    uvm_up_read(&g_uvm_global.pm.lock);
 }

+static void uvm_mm_release(struct file *filp, struct file *uvm_file)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
+    uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
+    struct mm_struct *mm = va_space_mm->mm;
+
+    if (uvm_va_space_mm_enabled(va_space)) {
+        uvm_va_space_mm_unregister(va_space);
+
+        if (uvm_va_space_mm_enabled(va_space))
+            uvm_mmput(mm);
+
+        va_space_mm->mm = NULL;
+        fput(uvm_file);
+    }
+}
+
 static int uvm_release(struct inode *inode, struct file *filp)
 {
-    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    void *ptr;
+    uvm_va_space_t *va_space;
+    uvm_fd_type_t fd_type;
    int ret;

+    fd_type = uvm_fd_type(filp, &ptr);
+    UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
+    if (fd_type == UVM_FD_UNINITIALIZED) {
+        uvm_kvfree(filp->f_mapping);
+        return 0;
+    }
+    else if (fd_type == UVM_FD_MM) {
+        uvm_kvfree(filp->f_mapping);
+        uvm_mm_release(filp, (struct file *)ptr);
+        return 0;
+    }
+
+    UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
+    va_space = (uvm_va_space_t *)ptr;
    filp->private_data = NULL;
    filp->f_mapping = NULL;

@@ -100,7 +308,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
        // been destroyed, and va_space->mapping won't be used again. Still,
        // the va_space survives the inode if its destruction is deferred, in
        // which case the references are rendered stale.
-        address_space_init_once(&va_space->mapping);
+        address_space_init_once(va_space->mapping);

        nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
        ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
@@ -363,14 +571,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
 static void uvm_vm_close_managed(struct vm_area_struct *vma)
 {
    uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
-    uvm_gpu_t *gpu;
+    uvm_processor_id_t gpu_id;
    bool make_zombie = false;

    if (current->mm != NULL)
        uvm_record_lock_mmap_lock_write(current->mm);

-    UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
-
    // current->mm will be NULL on process teardown, in which case we have
    // special handling.
    if (current->mm == NULL) {
@@ -400,13 +606,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)

    uvm_destroy_vma_managed(vma, make_zombie);

-    // Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
-    // that can be attributed to new VA ranges reallocated at the same address
-    for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
-        uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
-        UVM_ASSERT(gpu_va_space);
-
-        gpu_va_space->needs_fault_buffer_flush = true;
+    // Notify GPU address spaces that the fault buffer needs to be flushed to
+    // avoid finding stale entries that can be attributed to new VA ranges
+    // reallocated at the same address.
+    for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
+        uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
    }
    uvm_va_space_up_write(va_space);

@@ -556,7 +760,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =

 static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    uvm_va_space_t *va_space;
    uvm_va_range_t *va_range;
    NV_STATUS status = uvm_global_get_status();
    int ret = 0;
@@ -565,8 +769,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    if (status != NV_OK)
        return -nv_status_to_errno(status);

-    status = uvm_va_space_initialized(va_space);
-    if (status != NV_OK)
+    va_space = uvm_fd_va_space(filp);
+    if (!va_space)
        return -EBADFD;

    // When the VA space is associated with an mm, all vmas under the VA space
@@ -618,7 +822,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    // Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
    // so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
    // with VM_IO, but that causes other mapping issues.
-    nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND);
+    // Make the default behavior be VM_DONTCOPY to avoid the performance impact
+    // of removing CPU mappings in the parent on fork()+exec(). Users can call
+    // madvise(MDV_DOFORK) if the child process requires access to the
+    // allocation.
+    nv_vm_flags_set(vma, VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY);

    vma->vm_ops = &uvm_vm_ops_managed;

@@ -678,6 +886,13 @@ out:
    return ret;
 }

+bool uvm_vma_is_managed(struct vm_area_struct *vma)
+{
+    return vma->vm_ops == &uvm_vm_ops_disabled ||
+           vma->vm_ops == &uvm_vm_ops_managed ||
+           vma->vm_ops == &uvm_vm_ops_semaphore_pool;
+}
+
 static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
 {
   UVM_ENTRY_RET(uvm_mmap(filp, vma));
@@ -685,7 +900,56 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)

 static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
 {
-    return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
+    uvm_va_space_t *va_space;
+    NV_STATUS status;
+    uvm_fd_type_t old_fd_type;
+
+    // Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
+    // threads may call this ioctl concurrently so we have to be careful to
+    // avoid initializing multiple va_spaces and/or leaking memory. To do this
+    // we do an atomic compare and swap. Only one thread will observe
+    // UVM_FD_UNINITIALIZED and that thread will allocate and setup the
+    // va_space.
+    //
+    // Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
+    // the case of UVM_FD_VA_SPACE we return success if and only if the
+    // initialization flags match. If another thread is still initializing the
+    // va_space we return NV_ERR_BUSY_RETRY.
+    //
+    // If va_space initialization fails we return the failure code and reset the
+    // FD state back to UVM_FD_UNINITIALIZED to allow another initialization
+    // attempt to be made. This is safe because other threads will have only had
+    // a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
+    // case.
+    old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
+                                         UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
+    old_fd_type &= UVM_FD_TYPE_MASK;
+    if (old_fd_type == UVM_FD_UNINITIALIZED) {
+        status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
+        if (status != NV_OK) {
+            atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
+            return status;
+        }
+
+        atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
+    }
+    else if (old_fd_type == UVM_FD_VA_SPACE) {
+        va_space = uvm_va_space_get(filp);
+
+        if (params->flags != va_space->initialization_flags)
+            status = NV_ERR_INVALID_ARGUMENT;
+        else
+            status = NV_OK;
+    }
+    else if (old_fd_type == UVM_FD_MM) {
+        status = NV_ERR_INVALID_ARGUMENT;
+    }
+    else {
+        UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
+        status = NV_ERR_BUSY_RETRY;
+    }
+
+    return status;
 }

 static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
@@ -703,6 +967,7 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
            return 0;

        UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_INITIALIZE,                  uvm_api_initialize);
+        UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_MM_INITIALIZE,               uvm_api_mm_initialize);

        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS,            uvm_api_pageable_mem_access);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_PAGEABLE_MEM_ACCESS_ON_GPU,     uvm_api_pageable_mem_access_on_gpu);
@@ -782,11 +1047,6 @@ static const struct file_operations uvm_fops =
    .owner           = THIS_MODULE,
 };

-bool uvm_file_is_nvidia_uvm(struct file *filp)
-{
-    return (filp != NULL) && (filp->f_op == &uvm_fops);
-}
-
 NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
 {
    long ret;
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -54,7 +54,7 @@
 #ifndef _UVM_H_
 #define _UVM_H_

-#define UVM_API_LATEST_REVISION 7
+#define UVM_API_LATEST_REVISION 8

 #if !defined(UVM_API_REVISION)
 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@@ -211,12 +211,12 @@ NV_STATUS UvmDeinitialize(void);
 // UvmReopen
 //
 // Reinitializes the UVM driver after checking for minimal user-mode state.
-// Before calling this function, all GPUs must be unregistered with 
+// Before calling this function, all GPUs must be unregistered with
 // UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
 // Note that it is not required to release VA ranges that were reserved with
 // UvmReserveVa().
 //
-// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and 
+// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
 // replaces it with a new open file with the same name.
 //
 // Arguments:
@@ -410,6 +410,12 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
 // location will have their range group association changed to
 // UVM_RANGE_GROUP_ID_NONE.
 //
+// If the Confidential Computing feature is enabled in the system, any VA
+// ranges allocated using UvmAllocSemaphorePool and owned by this GPU will be
+// unmapped from all GPUs and the CPU. UvmFree must still be called on those
+// ranges to reclaim the VA. See UvmAllocSemaphorePool to determine which GPU
+// is considered the owner.
+//
 // Arguments:
 //     gpuUuid: (INPUT)
 //         UUID of the GPU to unregister.
@@ -1094,10 +1100,12 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
 // Creates a new mapping in the virtual address space of the process, populates
 // it at the specified preferred location, maps it on the provided list of
 // processors if feasible and associates the range with the given range group.
+// If the preferredLocationUuid is the UUID of the CPU, preferred location is
+// set to all CPU nodes allowed by the global and thread memory policies.
 //
 // This API is equivalent to the following code sequence:
 //     UvmMemMap(base, length);
-//     UvmSetPreferredLocation(base, length, preferredLocationUuid);
+//     UvmSetPreferredLocation(base, length, preferredLocationUuid, -1);
 //     for (i = 0; i < accessedByCount; i++) {
 //         UvmSetAccessedBy(base, length, &accessedByUuids[i]);
 //     }
@@ -1262,6 +1270,12 @@ NV_STATUS UvmCleanUpZombieResources(void);
 //
 // The VA range can be unmapped and freed via a call to UvmFree.
 //
+// If the Confidential Computing feature is enabled in the system, at least one
+// GPU must be provided in the perGpuAttribs array. The first GPU in the array
+// is considered the owning GPU. If the owning GPU is unregistered via
+// UvmUnregisterGpu, this allocation will no longer be usable.
+// See UvmUnregisterGpu.
+//
 // Arguments:
 //     base: (INPUT)
 //         Base address of the virtual address range.
@@ -1298,6 +1312,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
 //     NV_ERR_INVALID_ARGUMENT:
 //         perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
 //         or caching is requested on more than one GPU.
+//         The Confidential Computing feature is enabled and the perGpuAttribs
+//         list is empty.
 //
 //     NV_ERR_NOT_SUPPORTED:
 //         The current process is not the one which called UvmInitialize, and
@@ -1444,7 +1460,7 @@ NV_STATUS UvmMigrate(void                  *base,
 NV_STATUS UvmMigrate(void                  *base,
                     NvLength               length,
                     const NvProcessorUuid *destinationUuid,
-                     NvU32                  preferredCpuMemoryNode);
+                     NvS32                  preferredCpuMemoryNode);
 #endif

 //------------------------------------------------------------------------------
@@ -1537,7 +1553,7 @@ NV_STATUS UvmMigrateAsync(void                  *base,
 NV_STATUS UvmMigrateAsync(void                  *base,
                          NvLength               length,
                          const NvProcessorUuid *destinationUuid,
-                          NvU32                  preferredCpuMemoryNode,
+                          NvS32                  preferredCpuMemoryNode,
                          void                  *semaphoreAddress,
                          NvU32                  semaphorePayload);
 #endif
@@ -1746,17 +1762,20 @@ NV_STATUS UvmCreateExternalRange(void     *base,
 // GPUs. The external allocation can be unmapped from a specific GPU using
 // UvmUnmapExternal or from all GPUs using UvmFree.
 //
-// The virtual address range specified by (base, length) must be aligned to the
-// allocation's physical page size and must fall within a VA range previously
-// created with UvmCreateExternalRange. A GPU VA space must have been registered
-// for each GPU in the list. The offset in the physical allocation at which the
-// allocation must be mapped should also be aligned to the allocation's physical
-// page size. The (base, length) range must lie within the largest possible
-// virtual address supported by the specified GPUs.
+// The virtual address range specified by (base, length) must fall within a VA
+// range previously created with UvmCreateExternalRange. A GPU VA space must
+// have been registered for each GPU in the list. The (base, length) range must
+// lie within the largest possible virtual address supported by the specified
+// GPUs.
+//
+// The page size used for the mapping is the largest supported page size less
+// than or equal to the alignments of base, length, offset, and the allocation
+// page size.
 //
 // If the range specified by (base, length) falls within any existing mappings,
 // the behavior is the same as if UvmUnmapExternal with the range specified by
-// (base, length) had been called first.
+// (base, length) had been called first, provided that base and length are
+// aligned to the page size used for the existing one.
 //
 // If the allocation resides in GPU memory, that GPU must have been registered
 // via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
@@ -1838,8 +1857,9 @@ NV_STATUS UvmCreateExternalRange(void     *base,
 //         - The requested address range does not fall entirely within an
 //           existing external VA range created with a single call to
 //           UvmCreateExternalRange.
-//         - At least one of base and length is not aligned to the allocation's
-//           physical page size.
+//         - The mapping page size allowed by the alignments of base, length,
+//           and offset is smaller than the minimum supported page size on the
+//           GPU.
 //         - base or base + length fall within an existing mapping but are not
 //           aligned to that mapping's page size.
 //
@@ -1848,8 +1868,7 @@ NV_STATUS UvmCreateExternalRange(void     *base,
 //         address supported by one or more of the specified GPUs.
 //
 //     NV_ERR_INVALID_OFFSET:
-//         offset is not aligned to the allocation's physical page size or
-//         offset+length exceeds the allocation size.
+//         - offset+length exceeds the allocation size.
 //
 //     NV_ERR_INVALID_DEVICE:
 //         One of the following occurred:
@@ -2214,11 +2233,10 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 // supported by the specified processor.
 //
 // The virtual address range specified by (base, length) must have been
-// allocated via a call to either UvmAlloc or UvmMemMap, or be supported
-// system-allocated pageable memory. If the input range is pageable memory and
-// at least one GPU in the system supports transparent access to pageable
-// memory, the behavior described below does not take effect and the preferred
-// location of the pages in the given range does not change.
+// allocated via a call to either UvmAlloc or UvmMemMap (managed memory), or be
+// supported system-allocated pageable memory. If the input range corresponds to
+// a file backed shared mapping and least one GPU in the system supports
+// transparent access to pageable memory, the behavior below is not guaranteed.
 //
 // If any pages in the VA range are associated with a range group that was made
 // non-migratable via UvmPreventMigrationRangeGroups, then those pages are
@@ -2237,17 +2255,17 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 // not cause a migration if a mapping for that page from that processor can be
 // established without migrating the page.
 //
-// When a page migrates away from its preferred location, the mapping on the
-// preferred location's processor is cleared so that the next access from that
-// processor will cause a fault and migrate the page back to its preferred
-// location. In other words, a page is mapped on the preferred location's
-// processor only if the page is in its preferred location. Thus, when the
-// preferred location changes, mappings to pages in the given range are removed
-// from the new preferred location if the pages are resident in a different
-// processor. Note that if the preferred location's processor is a GPU, then a
-// mapping from that GPU to a page in the VA range is only created if a GPU VA
-// space has been registered for that GPU and the page is in its preferred
-// location.
+// When a page that was allocated via either UvmAlloc or UvmMemMap migrates away
+// from its preferred location, the mapping on the preferred location's
+// processor is cleared so that the next access from that processor will cause a
+// fault and migrate the page back to its preferred location. In other words, a
+// page is mapped on the preferred location's processor only if the page is in
+// its preferred location. Thus, when the preferred location changes, mappings
+// to pages in the given range are removed from the new preferred location if
+// the pages are resident in a different processor. Note that if the preferred
+// location's processor is a GPU, then a mapping from that GPU to a page in the
+// VA range is only created if a GPU VA space has been registered for that GPU
+// and the page is in its preferred location.
 //
 // If read duplication has been enabled for any pages in this VA range and
 // UvmPreventMigrationRangeGroups has not been called on the range group that
@@ -2260,7 +2278,7 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //
 // If the preferred location processor is present in the accessed-by list of any
 // of the pages in this VA range, then the migration and mapping policies
-// associated with associated with the accessed-by list.
+// associated with this API override those associated with the accessed-by list.
 //
 // The state set by this API can be cleared either by calling
 // UvmUnsetPreferredLocation for the same VA range or by calling
@@ -2281,35 +2299,66 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //     preferredLocationUuid: (INPUT)
 //         UUID of the preferred location.
 //
+//     preferredCpuNumaNode: (INPUT)
+//         Preferred CPU NUMA memory node used if preferredLocationUuid is the
+//         UUID of the CPU. -1 is a special value which indicates all CPU nodes
+//         allowed by the global and thread memory policies. This argument is
+//         ignored if preferredLocationUuid refers to a GPU or the given virtual
+//         address range corresponds to managed memory. If NUMA is not enabled,
+//         only 0 or -1 is allowed.
+//
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
-//         base and length are not properly aligned, or the range does not
-//         represent a valid UVM allocation, or the range is pageable memory and
-//         the system does not support accessing pageable memory, or the range
-//         does not represent a supported Operating System allocation.
+//         One of the following occurred:
+//         - base and length are not properly aligned.
+//         - The range does not represent a valid UVM allocation.
+//         - The range is pageable memory and the system does not support
+//           accessing pageable memory.
+//         - The range does not represent a supported Operating System
+//           allocation.
 //
 //     NV_ERR_OUT_OF_RANGE:
 //         The VA range exceeds the largest virtual address supported by the
 //         specified processor.
 //
 //     NV_ERR_INVALID_DEVICE:
-//         preferredLocationUuid is neither the UUID of the CPU nor the UUID of
-//         a GPU that was registered by this process. Or at least one page in
-//         VA range belongs to a non-migratable range group and the specified
-//         UUID represents a fault-capable GPU. Or preferredLocationUuid is the
-//         UUID of a non-fault-capable GPU and at least one page in the VA range
-//         belongs to a non-migratable range group and another non-fault-capable
-//         GPU is in the accessed-by list of the same page but P2P support
-//         between both GPUs has not been enabled.
+//         One of the following occurred:
+//         - preferredLocationUuid is neither the UUID of the CPU nor the UUID
+//           of a GPU that was registered by this process.
+//         - At least one page in VA range belongs to a non-migratable range
+//           group and the specified UUID represents a fault-capable GPU.
+//         - preferredLocationUuid is the UUID of a non-fault-capable GPU and at
+//           least one page in the VA range belongs to a non-migratable range
+//           group and another non-fault-capable GPU is in the accessed-by list
+//           of the same page but P2P support between both GPUs has not been
+//           enabled.
+//
+//      NV_ERR_INVALID_ARGUMENT:
+//         One of the following occured:
+//         - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
+//           refers to a registered GPU.
+//         - preferredCpuNumaNode is invalid and preferredLocationUuid is the
+//           UUID of the CPU.
+//
+//     NV_ERR_NOT_SUPPORTED:
+//         The UVM file descriptor is associated with another process and the
+//         input virtual range corresponds to system-allocated pageable memory.
 //
 //     NV_ERR_GENERIC:
 //         Unexpected error. We try hard to avoid returning this error code,
 //         because it is not very informative.
 //
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(7)
 NV_STATUS UvmSetPreferredLocation(void                  *base,
                                  NvLength               length,
                                  const NvProcessorUuid *preferredLocationUuid);
+#else
+NV_STATUS UvmSetPreferredLocation(void                  *base,
+                                  NvLength               length,
+                                  const NvProcessorUuid *preferredLocationUuid,
+                                  NvS32                  preferredCpuNumaNode);
+#endif

 //------------------------------------------------------------------------------
 // UvmUnsetPreferredLocation
@@ -2323,10 +2372,9 @@ NV_STATUS UvmSetPreferredLocation(void                  *base,
 //
 // The virtual address range specified by (base, length) must have been
 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
-// system-allocated pageable memory. If the input range is pageable memory and
-// at least one GPU in the system supports transparent access to pageable
-// memory, the behavior described below does not take effect and the preferred
-// location of the pages in the given range does not change.
+// system-allocated pageable memory. If the input range corresponds to a file
+// backed shared mapping and least one GPU in the system supports transparent
+// access to pageable memory, the behavior below is not guaranteed.
 //
 // If the VA range is associated with a non-migratable range group, then that
 // association is cleared. i.e. the pages in this VA range have their range
@@ -2345,10 +2393,18 @@ NV_STATUS UvmSetPreferredLocation(void                  *base,
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
-//         base and length are not properly aligned or the range does not
-//         represent a valid UVM allocation, or the range is pageable memory and
-//         the system does not support accessing pageable memory, or the range
-//         does not represent a supported Operating System allocation.
+//         One of the following occured:
+//         - base and length are not properly aligned or the range does not
+//           represent a valid UVM allocation.
+//         - The range is pageable memory and the system does not support
+//           accessing pageable memory.
+//         - The range does not represent a supported Operating System
+//           allocation.
+//         - The range contains both managed and pageable memory allocations.
+//
+//     NV_ERR_NOT_SUPPORTED:
+//         The UVM file descriptor is associated with another process and the
+//         input virtual range corresponds to system-allocated pageable memory.
 //
 //     NV_ERR_GENERIC:
 //         Unexpected error. We try hard to avoid returning this error code,
@@ -2629,13 +2685,34 @@ NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
 //     NV_ERR_INVALID_STATE:
 //         UVM was not initialized before calling this function.
 //
-//     NV_ERR_GENERIC:
-//         Unexpected error. We try hard to avoid returning this error code,
-//         because it is not very informative.
-//
 //------------------------------------------------------------------------------
 NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd);

+//------------------------------------------------------------------------------
+// UvmGetMmFileDescriptor
+//
+// Returns the UVM file descriptor currently being used to keep the
+// memory management context valid. The data type of the returned file
+// descriptor is platform specific.
+//
+// If UvmInitialize has not yet been called, an error is returned.
+//
+// Arguments:
+//     returnedFd: (OUTPUT)
+//         A platform specific file descriptor.
+//
+// Error codes:
+//     NV_ERR_INVALID_ARGUMENT:
+//         returnedFd is NULL.
+//
+//     NV_ERR_INVALID_STATE:
+//         UVM was not initialized before calling this function.
+//
+//     NV_ERR_NOT_SUPPORTED:
+//         This file descriptor is not required on this platform.
+//------------------------------------------------------------------------------
+NV_STATUS UvmGetMmFileDescriptor(UvmFileDescriptor *returnedFd);
+
 //------------------------------------------------------------------------------
 // UvmIs8Supported
 //
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@@ -49,11 +49,13 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    // A single top level PDE on Ada covers 128 TB and that's the minimum size
    // that can be used.
    parent_gpu->rm_va_base = 0;
-    parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;

-    parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

+    parent_gpu->ce_phys_vidmem_write_supported = true;
+
    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;

    // Not all units on Ada support 49-bit addressing, including those which
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -47,14 +47,16 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    // A single top level PDE on Ampere covers 128 TB and that's the minimum
    // size that can be used.
    parent_gpu->rm_va_base = 0;
-    parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;

-    parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

    // See uvm_mmu.h for mapping placement
-    parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
-    parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
+    parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;
+
+    parent_gpu->ce_phys_vidmem_write_supported = true;

    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;

--- a/kernel-open/nvidia-uvm/uvm_ampere_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_ce.c
@@ -27,7 +27,7 @@
 #include "clc7b5.h"
 #include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT

-bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
    if (!uvm_channel_is_proxy(push->channel))
        return true;
@@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
    return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
 }

-bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
    NvU64 push_begin_gpu_va;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
        return true;

    if (uvm_channel_is_proxy(push->channel)) {
+        uvm_pushbuffer_t *pushbuffer;
+
        if (dst.is_virtual) {
            UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
            return false;
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
            return false;
        }

-        push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
+        pushbuffer = uvm_channel_get_pushbuffer(push->channel);
+        push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);

        if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
            UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
@@ -177,13 +180,19 @@ bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t
 // irrespective of the virtualization mode.
 void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
 {
+    uvm_pushbuffer_t *pushbuffer;
+
    if (!uvm_channel_is_proxy(push->channel))
        return;

-    src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
+    pushbuffer = uvm_channel_get_pushbuffer(push->channel);
+    src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
 }

-bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
+bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
+                                            uvm_gpu_address_t dst,
+                                            size_t num_elements,
+                                            size_t element_size)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -29,7 +29,7 @@
 #include "clc56f.h"
 #include "clc076.h"

-bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

@@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
   return true;
 }

-bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
    if (!uvm_channel_is_proxy(push->channel))
        return true;
--- a/kernel-open/nvidia-uvm/uvm_api.h
+++ b/kernel-open/nvidia-uvm/uvm_api.h
@@ -25,6 +25,7 @@
 #define __UVM_API_H__

 #include "uvm_types.h"
+#include "uvm_common.h"
 #include "uvm_ioctl.h"
 #include "uvm_linux.h"
 #include "uvm_lock.h"
@@ -51,8 +52,10 @@
                                                                                    \
        params.rmStatus = uvm_global_get_status();                                  \
        if (params.rmStatus == NV_OK) {                                             \
-            if (do_init_check)                                                      \
-                params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
+            if (do_init_check) {                                                    \
+                if (!uvm_fd_va_space(filp))                                         \
+                    params.rmStatus = NV_ERR_ILLEGAL_ACTION;                        \
+            }                                                                       \
            if (likely(params.rmStatus == NV_OK))                                   \
                params.rmStatus = function_name(&params, filp);                     \
        }                                                                           \
@@ -88,8 +91,10 @@
                                                                                        \
        params->rmStatus = uvm_global_get_status();                                     \
        if (params->rmStatus == NV_OK) {                                                \
-            if (do_init_check)                                                          \
-                params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp));    \
+            if (do_init_check) {                                                        \
+                if (!uvm_fd_va_space(filp))                                             \
+                    params->rmStatus = NV_ERR_ILLEGAL_ACTION;                           \
+            }                                                                           \
            if (likely(params->rmStatus == NV_OK))                                      \
                params->rmStatus = function_name(params, filp);                         \
        }                                                                               \
@@ -196,21 +201,20 @@ static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
    return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
 }

-// Returns true if the interval [start, start + length -1] is entirely covered
-// by vmas.
-//
-// LOCKING: mm->mmap_lock must be held in at least read mode.
-bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
+typedef enum
+{
+    UVM_API_RANGE_TYPE_MANAGED,
+    UVM_API_RANGE_TYPE_HMM,
+    UVM_API_RANGE_TYPE_ATS,
+    UVM_API_RANGE_TYPE_INVALID
+} uvm_api_range_type_t;

-// Check that the interval [base, base + length) is fully covered by UVM
-// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
-// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
-// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
-// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
+// If the interval [base, base + length) is fully covered by VMAs which all have
+// the same uvm_api_range_type_t, that range type is returned.
 //
 // LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
 //          mm->mmap_lock must also be held in at least read mode.
-NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
+uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);

 NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
 NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);
--- a/kernel-open/nvidia-uvm/uvm_ats.c
+++ b/kernel-open/nvidia-uvm/uvm_ats.c
@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)

 void uvm_ats_init_va_space(uvm_va_space_t *va_space)
 {
+    uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
+
    if (UVM_ATS_IBM_SUPPORTED())
        uvm_ats_ibm_init_va_space(va_space);
 }
@@ -57,6 +59,10 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)

        return uvm_ats_ibm_add_gpu(parent_gpu);
    }
+    else if (UVM_ATS_SVA_SUPPORTED()) {
+        if (g_uvm_global.ats.enabled)
+            return uvm_ats_sva_add_gpu(parent_gpu);
+    }

    return NV_OK;
 }
@@ -71,6 +77,10 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)

        uvm_ats_ibm_remove_gpu(parent_gpu);
    }
+    else if (UVM_ATS_SVA_SUPPORTED()) {
+        if (g_uvm_global.ats.enabled)
+            uvm_ats_sva_remove_gpu(parent_gpu);
+    }
 }

 NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
@@ -87,6 +97,8 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)

    if (UVM_ATS_IBM_SUPPORTED())
        status = uvm_ats_ibm_bind_gpu(gpu_va_space);
+    else if (UVM_ATS_SVA_SUPPORTED())
+        status = uvm_ats_sva_bind_gpu(gpu_va_space);

    return status;
 }
@@ -100,6 +112,8 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)

    if (UVM_ATS_IBM_SUPPORTED())
        uvm_ats_ibm_unbind_gpu(gpu_va_space);
+    else if (UVM_ATS_SVA_SUPPORTED())
+        uvm_ats_sva_unbind_gpu(gpu_va_space);
 }

 NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
@@ -126,6 +140,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)

    if (UVM_ATS_IBM_SUPPORTED())
        status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
+    else if (UVM_ATS_SVA_SUPPORTED())
+        status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);

    if (status == NV_OK)
        uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
@@ -148,6 +164,8 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)

    if (UVM_ATS_IBM_SUPPORTED())
        uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
+    else if (UVM_ATS_SVA_SUPPORTED())
+        uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);

    uvm_va_space_down_write(va_space);
    uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -28,8 +28,21 @@
 #include "uvm_forward_decl.h"
 #include "uvm_ats_ibm.h"
 #include "nv_uvm_types.h"
+#include "uvm_lock.h"

-    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
+    #include "uvm_ats_sva.h"
+
+    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
+
+// ATS prefetcher uses hmm_range_fault() to query residency information.
+// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
+// of memory regions while hmm_range_fault() is being called, MMU interval
+// notifiers are needed.
+    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
+        #define UVM_ATS_PREFETCH_SUPPORTED() 1
+    #else
+        #define UVM_ATS_PREFETCH_SUPPORTED() 0
+    #endif

 typedef struct
 {
@@ -37,10 +50,15 @@ typedef struct
    // indexed by gpu->id. This mask is protected by the VA space lock.
    uvm_processor_mask_t registered_gpu_va_spaces;

+    // Protects racing invalidates in the VA space while hmm_range_fault() is
+    // being called in ats_compute_residency_mask().
+    uvm_rw_semaphore_t lock;
+
    union
    {
        uvm_ibm_va_space_t ibm;

+        uvm_sva_va_space_t sva;
    };
 } uvm_ats_va_space_t;

@@ -58,6 +76,7 @@ typedef struct
    {
        uvm_ibm_gpu_va_space_t ibm;

+        uvm_sva_gpu_va_space_t sva;
    };
 } uvm_ats_gpu_va_space_t;

@@ -90,6 +109,8 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
 // LOCKING: mmap_lock must be lockable.
 //          VA space lock must be lockable.
 //          gpu_va_space->gpu must be retained.
+//          mm must be retained with uvm_va_space_mm_retain() iff
+//          UVM_ATS_SVA_SUPPORTED() is 1
 NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);

 // Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -20,74 +20,33 @@
    DEALINGS IN THE SOFTWARE.
 *******************************************************************************/

+#include "uvm_api.h"
 #include "uvm_tools.h"
 #include "uvm_va_range.h"
+#include "uvm_ats.h"
 #include "uvm_ats_faults.h"
 #include "uvm_migrate_pageable.h"
+#include <linux/nodemask.h>
+#include <linux/mempolicy.h>
+#include <linux/mmu_notifier.h>

-// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
-// these experimental parameters. These are intended to help guide that policy.
-static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
-module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
-MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
-                 "Max order of pages (2^N) to prefetch on replayable ATS faults");
+#if UVM_ATS_PREFETCH_SUPPORTED()
+#include <linux/hmm.h>
+#endif

-static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
-module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
-MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
-                 "Max order of pages (2^N) to prefetch on non-replayable ATS faults");
-
-// Expand the fault region to the naturally-aligned region with order given by
-// the module parameters, clamped to the vma containing fault_addr (if any).
-// Note that this means the region contains fault_addr but may not begin at
-// fault_addr.
-static void expand_fault_region(struct mm_struct *mm,
-                                NvU64 fault_addr,
-                                uvm_fault_client_type_t client_type,
-                                unsigned long *start,
-                                unsigned long *size)
-{
-    struct vm_area_struct *vma;
-    unsigned int order;
-    unsigned long outer, aligned_start, aligned_size;
-
-    *start = fault_addr;
-    *size = PAGE_SIZE;
-
-    if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
-        order = uvm_exp_perf_prefetch_ats_order_non_replayable;
-    else
-        order = uvm_exp_perf_prefetch_ats_order_replayable;
-
-    if (order == 0)
-        return;
-
-    vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
-    if (!vma)
-        return;
-
-    UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
-
-    aligned_size = (1UL << order) * PAGE_SIZE;
-
-    aligned_start = fault_addr & ~(aligned_size - 1);
-
-    *start = max(vma->vm_start, aligned_start);
-    outer = min(vma->vm_end, aligned_start + aligned_size);
-    *size = outer - *start;
-}
-
-static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
-                                       NvU64 fault_addr,
-                                       uvm_fault_access_type_t access_type,
-                                       uvm_fault_client_type_t client_type)
+static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
+                                    struct vm_area_struct *vma,
+                                    NvU64 start,
+                                    size_t length,
+                                    uvm_fault_access_type_t access_type,
+                                    uvm_ats_fault_context_t *ats_context)
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    struct mm_struct *mm = va_space->va_space_mm.mm;
    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
    NV_STATUS status;
-    NvU64 start;
-    NvU64 length;
+    NvU64 user_space_start;
+    NvU64 user_space_length;

    // Request uvm_migrate_pageable() to touch the corresponding page after
    // population.
@@ -96,17 +55,18 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
    // 2) guest physical -> host physical
    //
    // The overall ATS translation will fault if either of those translations is
-    // invalid. The get_user_pages() call above handles translation #1, but not
-    // #2. We don't know if we're running as a guest, but in case we are we can
-    // force that translation to be valid by touching the guest physical address
-    // from the CPU. If the translation is not valid then the access will cause
-    // a hypervisor fault. Note that dma_map_page() can't establish mappings
-    // used by GPU ATS SVA translations. GPU accesses to host physical addresses
-    // obtained as a result of the address translation request uses the CPU
-    // address space instead of the IOMMU address space since the translated
-    // host physical address isn't necessarily an IOMMU address. The only way to
-    // establish guest physical to host physical mapping in the CPU address
-    // space is to touch the page from the CPU.
+    // invalid. The pin_user_pages() call within uvm_migrate_pageable() call
+    // below handles translation #1, but not #2. We don't know if we're running
+    // as a guest, but in case we are we can force that translation to be valid
+    // by touching the guest physical address from the CPU. If the translation
+    // is not valid then the access will cause a hypervisor fault. Note that
+    // dma_map_page() can't establish mappings used by GPU ATS SVA translations.
+    // GPU accesses to host physical addresses obtained as a result of the
+    // address translation request uses the CPU address space instead of the
+    // IOMMU address space since the translated host physical address isn't
+    // necessarily an IOMMU address. The only way to establish guest physical to
+    // host physical mapping in the CPU address space is to touch the page from
+    // the CPU.
    //
    // We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
    // VM_WRITE, meaning that the mappings are all granted write access on any
@@ -117,23 +77,22 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,

    uvm_migrate_args_t uvm_migrate_args =
    {
-        .va_space               = va_space,
-        .mm                     = mm,
-        .dst_id                 = gpu_va_space->gpu->parent->id,
-        .dst_node_id            = -1,
-        .populate_permissions   = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
-        .touch                  = true,
-        .skip_mapped            = true,
-        .user_space_start       = &start,
-        .user_space_length      = &length,
+        .va_space                       = va_space,
+        .mm                             = mm,
+        .dst_id                         = ats_context->residency_id,
+        .dst_node_id                    = ats_context->residency_node,
+        .start                          = start,
+        .length                         = length,
+        .populate_permissions           = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
+        .touch                          = true,
+        .skip_mapped                    = true,
+        .populate_on_cpu_alloc_failures = true,
+        .user_space_start               = &user_space_start,
+        .user_space_length              = &user_space_length,
    };

    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));

-    expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
-
-    // TODO: Bug 2103669: Service more than a single fault at a time
-    //
    // We are trying to use migrate_vma API in the kernel (if it exists) to
    // populate and map the faulting region on the GPU. We want to do this only
    // on the first touch. That is, pages which are not already mapped. So, we
@@ -148,114 +107,448 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
-                                      uvm_fault_buffer_entry_t *current_entry,
-                                      uvm_ats_fault_invalidate_t *ats_invalidate)
+static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
+                                   NvU64 addr,
+                                   size_t size,
+                                   uvm_fault_client_type_t client_type)
 {
-    NvU64 gmmu_region_base;
-    bool in_gmmu_region;
-    NV_STATUS status = NV_OK;
-    uvm_fault_access_type_t service_access_type;
+    uvm_ats_fault_invalidate_t *ats_invalidate;

+    if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+    else
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
+
+    if (!ats_invalidate->write_faults_in_batch) {
+        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
+        ats_invalidate->write_faults_in_batch = true;
+    }
+
+    uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
+}
+
+static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
+                                       struct vm_area_struct *vma,
+                                       uvm_ats_fault_context_t *ats_context)
+{
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    int residency = uvm_gpu_numa_node(gpu);
+
+#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
+    struct mempolicy *vma_policy = vma_policy(vma);
+    unsigned short mode;
+
+    ats_context->prefetch_state.has_preferred_location = false;
+
+    // It's safe to read vma_policy since the mmap_lock is held in at least read
+    // mode in this path.
+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+
+    if (!vma_policy)
+        goto done;
+
+    mode = vma_policy->mode;
+
+    if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
+        int home_node = NUMA_NO_NODE;
+
+#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
+        if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
+            home_node = vma_policy->home_node;
+#endif
+
+        // Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
+        // in the list of preferred nodes, else prefer the closest_cpu_numa_node
+        // to the GPU if closest_cpu_numa_node is in the list of preferred
+        // nodes. Fallback to the faulting GPU if all else fails.
+        if (home_node != NUMA_NO_NODE) {
+            residency = home_node;
+        }
+        else if (!node_isset(residency, vma_policy->nodes)) {
+            int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
+
+            if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
+                residency = gpu->parent->closest_cpu_numa_node;
+            else
+                residency = first_node(vma_policy->nodes);
+        }
+
+        if (!nodes_empty(vma_policy->nodes))
+            ats_context->prefetch_state.has_preferred_location = true;
+    }
+
+    // Update gpu if residency is not the faulting gpu.
+    if (residency != uvm_gpu_numa_node(gpu))
+        gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
+
+done:
+#else
+    ats_context->prefetch_state.has_preferred_location = false;
+#endif
+
+    ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
+    ats_context->residency_node = residency;
+}
+
+static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
+{
+    *start = max(vma->vm_start, (unsigned long) base);
+    *end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
+}
+
+static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
+{
+    UVM_ASSERT(addr >= base);
+    UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
+
+    return (addr - base) / PAGE_SIZE;
+}
+
+// start and end must be aligned to PAGE_SIZE and must fall within
+// [base, base + UVM_VA_BLOCK_SIZE]
+static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
+{
+    // base can be greater than, less than or equal to the start of a VMA.
+    NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
+
+    UVM_ASSERT(start < end);
+    UVM_ASSERT(PAGE_ALIGNED(start));
+    UVM_ASSERT(PAGE_ALIGNED(end));
+    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
+
+    return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
+}
+
+static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
+{
+    NvU64 start;
+    NvU64 end;
+
+    get_range_in_vma(vma, base, &start, &end);
+
+    return uvm_ats_region_from_start_end(start, end);
+}
+
+#if UVM_ATS_PREFETCH_SUPPORTED()
+
+static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
+{
+    uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
+    uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
+
+    // The following write lock protects against concurrent invalidates while
+    // hmm_range_fault() is being called in ats_compute_residency_mask().
+    uvm_down_write(&va_space->ats.lock);
+
+    mmu_interval_set_seq(mni, cur_seq);
+
+    uvm_up_write(&va_space->ats.lock);
+
+    return true;
+}
+
+static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
+                                              const struct mmu_notifier_range *range,
+                                              unsigned long cur_seq)
+{
+    UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
+}
+
+static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
+{
+    .invalidate = uvm_ats_invalidate_notifier_entry,
+};
+
+#endif
+
+static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
+                                            struct vm_area_struct *vma,
+                                            NvU64 base,
+                                            uvm_ats_fault_context_t *ats_context)
+{
+    NV_STATUS status = NV_OK;
+
+#if UVM_ATS_PREFETCH_SUPPORTED()
+    int ret;
+    NvU64 start;
+    NvU64 end;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
+    struct hmm_range range;
+    uvm_page_index_t page_index;
+    uvm_va_block_region_t vma_region;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+
+    uvm_assert_rwsem_locked_read(&va_space->lock);
+
+    ats_context->prefetch_state.first_touch = true;
+
+    uvm_page_mask_zero(residency_mask);
+
+    get_range_in_vma(vma, base, &start, &end);
+
+    vma_region = uvm_ats_region_from_start_end(start, end);
+
+    range.notifier = &ats_context->prefetch_state.notifier;
+    range.start = start;
+    range.end = end;
+    range.hmm_pfns = ats_context->prefetch_state.pfns;
+    range.default_flags = 0;
+    range.pfn_flags_mask = 0;
+    range.dev_private_owner = NULL;
+
+    ats_context->prefetch_state.va_space = va_space;
+
+    // mmu_interval_notifier_insert() will try to acquire mmap_lock for write
+    // and will deadlock since mmap_lock is already held for read in this path.
+    // This is prevented by calling __mmu_notifier_register() during va_space
+    // creation. See the comment in uvm_mmu_notifier_register() for more
+    // details.
+    ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
+    if (ret)
+        return errno_to_nv_status(ret);
+
+    while (true) {
+        range.notifier_seq = mmu_interval_read_begin(range.notifier);
+        ret = hmm_range_fault(&range);
+        if (ret == -EBUSY)
+            continue;
+        if (ret) {
+            status = errno_to_nv_status(ret);
+            UVM_ASSERT(status != NV_OK);
+            break;
+        }
+
+        uvm_down_read(&va_space->ats.lock);
+
+        // Pages may have been freed or re-allocated after hmm_range_fault() is
+        // called. So the PTE might point to a different page or nothing. In the
+        // memory hot-unplug case it is not safe to call page_to_nid() on the
+        // page as the struct page itself may have been freed. To protect
+        // against these cases, uvm_ats_invalidate_entry() blocks on va_space
+        // ATS write lock for concurrent invalidates since va_space ATS lock is
+        // held for read in this path.
+        if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
+            break;
+
+        uvm_up_read(&va_space->ats.lock);
+    }
+
+    if (status == NV_OK) {
+        for_each_va_block_page_in_region(page_index, vma_region) {
+            unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
+
+            if (pfn & HMM_PFN_VALID) {
+                struct page *page = hmm_pfn_to_page(pfn);
+
+                if (page_to_nid(page) == ats_context->residency_node)
+                    uvm_page_mask_set(residency_mask, page_index);
+
+                ats_context->prefetch_state.first_touch = false;
+            }
+        }
+
+        uvm_up_read(&va_space->ats.lock);
+    }
+
+    mmu_interval_notifier_remove(range.notifier);
+
+#endif
+
+    return status;
+}
+
+static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
+                                    struct vm_area_struct *vma,
+                                    uvm_ats_fault_context_t *ats_context,
+                                    uvm_va_block_region_t max_prefetch_region,
+                                    uvm_page_mask_t *faulted_mask)
+{
+    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
+    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
+    uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
+
+    if (uvm_page_mask_empty(faulted_mask))
+        return;
+
+    uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
+                                  faulted_mask,
+                                  uvm_va_block_region_from_mask(NULL, faulted_mask),
+                                  max_prefetch_region,
+                                  residency_mask,
+                                  bitmap_tree,
+                                  prefetch_mask);
+
+    uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
+
+    if (vma->vm_flags & VM_WRITE)
+        uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
+}
+
+static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
+                                    struct vm_area_struct *vma,
+                                    NvU64 base,
+                                    uvm_ats_fault_context_t *ats_context)
+{
+    NV_STATUS status = NV_OK;
+    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
+    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
+
+    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
+        return status;
+
+    if (uvm_page_mask_empty(faulted_mask))
+        return status;
+
+    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        return status;
+
+    // Prefetch the entire region if none of the pages are resident on any node
+    // and if preferred_location is the faulting GPU.
+    if (ats_context->prefetch_state.has_preferred_location &&
+        ats_context->prefetch_state.first_touch &&
+        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
+
+        uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
+        uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
+
+        if (vma->vm_flags & VM_WRITE)
+            uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
+
+        return status;
+    }
+
+    ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
+
+    return status;
+}
+
+NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
+                                 struct vm_area_struct *vma,
+                                 NvU64 base,
+                                 uvm_ats_fault_context_t *ats_context)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_block_region_t subregion;
+    uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
+    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
+    uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
+    uvm_fault_client_type_t client_type = ats_context->client_type;
+
+    UVM_ASSERT(vma);
+    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
    UVM_ASSERT(g_uvm_global.ats.enabled);
+    UVM_ASSERT(gpu_va_space);
    UVM_ASSERT(gpu_va_space->ats.enabled);
    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);

-    UVM_ASSERT(current_entry->fault_access_type ==
-               uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
+    uvm_page_mask_zero(faults_serviced_mask);
+    uvm_page_mask_zero(reads_serviced_mask);

-    service_access_type = current_entry->fault_access_type;
+    if (!(vma->vm_flags & VM_READ))
+        return status;

-    // ATS lookups are disabled on all addresses within the same
-    // UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
-    // uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
-    // prevent any system memory allocations from falling within the NO_ATS
-    // range of other GMMU mappings, so this shouldn't happen during normal
-    // operation. However, since this scenario may lead to infinite fault loops,
-    // we handle it by canceling the fault.
-    //
-    // TODO: Bug 2103669: Remove redundant VA range lookups
-    gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
-    in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
-                                               gmmu_region_base,
-                                               gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
-    if (in_gmmu_region) {
-        status = NV_ERR_INVALID_ADDRESS;
-    }
-    else {
-        // TODO: Bug 2103669: Service more than a single fault at a time
-        status = uvm_ats_service_fault(gpu_va_space,
-                                       current_entry->fault_address,
-                                       service_access_type,
-                                       current_entry->fault_source.client_type);
+    if (!(vma->vm_flags & VM_WRITE)) {
+        // If VMA doesn't have write permissions, all write faults are fatal.
+        // Try servicing such faults for read iff they are also present in
+        // read_fault_mask. This is because for replayable faults, if there are
+        // pending read accesses on the same page, we have to service them
+        // before we can cancel the write/atomic faults. So we try with read
+        // fault access type even though these write faults are fatal.
+        if (ats_context->client_type == UVM_FAULT_CLIENT_TYPE_GPC)
+            uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
+        else
+            uvm_page_mask_zero(write_fault_mask);
    }

-    // Do not flag prefetch faults as fatal unless something fatal happened
-    if (status == NV_ERR_INVALID_ADDRESS) {
-        if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
-            current_entry->is_fatal = true;
-            current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+    ats_batch_select_residency(gpu_va_space, vma, ats_context);

-            // Compute cancel mode for replayable faults
-            if (current_entry->is_replayable) {
-                if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
-                    current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
-                else
-                    current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
+    ats_fault_prefetch(gpu_va_space, vma, base, ats_context);

-                // If there are pending read accesses on the same page, we have to
-                // service them before we can cancel the write/atomic faults. So we
-                // retry with read fault access type.
-                if (!in_gmmu_region &&
-                    current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
-                    uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
-                    status = uvm_ats_service_fault(gpu_va_space,
-                                                   current_entry->fault_address,
-                                                   UVM_FAULT_ACCESS_TYPE_READ,
-                                                   current_entry->fault_source.client_type);
+    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
+        NvU64 start = base + (subregion.first * PAGE_SIZE);
+        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
+        uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
+                                                                          UVM_FAULT_ACCESS_TYPE_WRITE :
+                                                                          UVM_FAULT_ACCESS_TYPE_READ;

-                    // If read accesses are also invalid, cancel the fault. If a
-                    // different error code is returned, exit
-                    if (status == NV_ERR_INVALID_ADDRESS)
-                        current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
-                    else if (status != NV_OK)
-                        return status;
-                }
-            }
+        UVM_ASSERT(start >= vma->vm_start);
+        UVM_ASSERT((start + length) <= vma->vm_end);
+
+        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        if (status != NV_OK)
+            return status;
+
+        if (vma->vm_flags & VM_WRITE) {
+            uvm_page_mask_region_fill(faults_serviced_mask, subregion);
+
+            // The Linux kernel never invalidates TLB entries on mapping
+            // permission upgrade. This is a problem if the GPU has cached
+            // entries with the old permission. The GPU will re-fetch the entry
+            // if the PTE is invalid and page size is not 4K (this is the case
+            // on P9). However, if a page gets upgraded from R/O to R/W and GPU
+            // has the PTEs cached with R/O permissions we will enter an
+            // infinite loop because we just forward the fault to the Linux
+            // kernel and it will see that the permissions in the page table are
+            // correct. Therefore, we flush TLB entries on ATS write faults.
+            flush_tlb_write_faults(gpu_va_space, start, length, client_type);
        }
        else {
-            current_entry->is_invalid_prefetch = true;
+            uvm_page_mask_region_fill(reads_serviced_mask, subregion);
        }
-
-        // Do not fail overall fault servicing due to logical errors
-        status = NV_OK;
    }

-    // The Linux kernel never invalidates TLB entries on mapping permission
-    // upgrade. This is a problem if the GPU has cached entries with the old
-    // permission. The GPU will re-fetch the entry if the PTE is invalid and
-    // page size is not 4K (this is the case on P9). However, if a page gets
-    // upgraded from R/O to R/W and GPU has the PTEs cached with R/O
-    // permissions we will enter an infinite loop because we just forward the
-    // fault to the Linux kernel and it will see that the permissions in the
-    // page table are correct. Therefore, we flush TLB entries on ATS write
-    // faults.
-    if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
-        if (!ats_invalidate->write_faults_in_batch) {
-            uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
-            ats_invalidate->write_faults_in_batch = true;
-        }
+    // Remove write faults from read_fault_mask
+    uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);

-        uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
-                                 current_entry->fault_address,
-                                 PAGE_SIZE,
-                                 PAGE_SIZE,
-                                 UVM_MEMBAR_NONE);
+    for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
+        NvU64 start = base + (subregion.first * PAGE_SIZE);
+        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
+        uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
+
+        UVM_ASSERT(start >= vma->vm_start);
+        UVM_ASSERT((start + length) <= vma->vm_end);
+
+        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        if (status != NV_OK)
+            return status;
+
+        uvm_page_mask_region_fill(faults_serviced_mask, subregion);
    }

    return status;
 }

+bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next)
+{
+    uvm_va_range_t *prev;
+    NvU64 gmmu_region_base = UVM_ALIGN_DOWN(address, UVM_GMMU_ATS_GRANULARITY);
+
+    UVM_ASSERT(va_space);
+
+    if (next) {
+        if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
+            return true;
+
+        prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
+    }
+    else {
+        // No VA range exists after address, so check the last VA range in the
+        // tree.
+        prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
+    }
+
+    return prev && (prev->node.end >= gmmu_region_base);
+}
+
 NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
                                  uvm_ats_fault_invalidate_t *ats_invalidate,
                                  uvm_tracker_t *out_tracker)
@@ -287,3 +580,4 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,

    return status;
 }
+
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@@ -25,10 +25,31 @@
 #include "uvm_lock.h"
 #include "uvm_global.h"
 #include "uvm_va_space.h"
+#include "uvm_gpu.h"

-NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
-                                      uvm_fault_buffer_entry_t *current_entry,
-                                      uvm_ats_fault_invalidate_t *ats_invalidate);
+// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
+// type for individual pages in the range requested by page masks set in
+// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
+// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
+// addresses fall completely within the VMA. The caller is also responsible for
+// ensuring that the faulting addresses don't overlap a GMMU region. (See
+// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
+// any errors returned by this function (fault cancellations etc.).
+//
+// Returns the fault service status in ats_context->faults_serviced_mask. In
+// addition, ats_context->reads_serviced_mask returns whether read servicing
+// worked on write faults iff the read service was also requested in the
+// corresponding bit in read_fault_mask. These returned masks are only valid if
+// the return status is NV_OK. Status other than NV_OK indicate system global
+// fault servicing failures.
+NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
+                                 struct vm_area_struct *vma,
+                                 NvU64 base,
+                                 uvm_ats_fault_context_t *ats_context);
+
+// Return whether there are any VA ranges (and thus GMMU mappings) within the
+// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
+bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);

 // This function performs pending TLB invalidations for ATS and clears the
 // ats_invalidate->write_faults_in_batch flag
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.c
@@ -0,0 +1,156 @@
+/*******************************************************************************
+    Copyright (c) 2018-2023 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_ats_sva.h"
+
+#if UVM_ATS_SVA_SUPPORTED()
+
+#include "uvm_gpu.h"
+#include "uvm_va_space.h"
+#include "uvm_va_space_mm.h"
+
+#include <linux/iommu.h>
+#include <linux/mm_types.h>
+
+// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
+// reference required for the iommu_sva_bind_device() call. This header is not
+// present in all the supported versions. Instead of adding a conftest just for
+// this header file, use UVM_ATS_SVA_SUPPORTED().
+#include <linux/sched/mm.h>
+
+// iommu_sva_bind_device() removed drvdata paramter with commit
+// 942fd5435dccb273f90176b046ae6bbba60cfbd8 (10/31/2022).
+#if defined(NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG)
+#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm, NULL)
+#else
+#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
+#endif
+
+NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    int ret;
+
+    ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
+
+    return errno_to_nv_status(ret);
+}
+
+void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
+{
+    iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
+}
+
+NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+    NV_STATUS status = NV_OK;
+    struct iommu_sva *iommu_handle;
+    struct pci_dev *pci_dev = gpu_va_space->gpu->parent->pci_dev;
+    uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
+    struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
+
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
+    UVM_ASSERT(mm);
+
+    // The mmput() below may trigger the kernel's mm teardown with exit_mmap()
+    // and uvm_va_space_mm_shutdown() and uvm_vm_close_managed() in that path
+    // will try to grab the va_space lock and deadlock if va_space was already
+    // locked.
+    uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
+
+    // iommu_sva_bind_device() requires the mm reference to be acquired. Since
+    // the mm is already retained, mm is still valid but may be inactive since
+    // mm_users can still be zero since UVM doesn't use mm_users and maintains a
+    // separate refcount (retained_count) for the mm in va_space_mm. See the
+    // block comment in va_space_mm.c for more details. So, return an error if
+    // mm_users is zero.
+    if (!mmget_not_zero(mm))
+        return NV_ERR_PAGE_TABLE_NOT_AVAIL;
+
+    // Multiple calls for the {same pci_dev, mm} pair are refcounted by the ARM
+    // SMMU Layer.
+    iommu_handle = UVM_IOMMU_SVA_BIND_DEVICE(&pci_dev->dev, mm);
+    if (IS_ERR(iommu_handle)) {
+        status = errno_to_nv_status(PTR_ERR(iommu_handle));
+        goto out;
+    }
+
+    // If this is not the first bind of the gpu in the mm, then the previously
+    // stored iommu_handle in the gpu_va_space must match the handle returned by
+    // iommu_sva_bind_device().
+    if (sva_gpu_va_space->iommu_handle) {
+        UVM_ASSERT(sva_gpu_va_space->iommu_handle == iommu_handle);
+        nv_kref_get(&sva_gpu_va_space->kref);
+    }
+    else {
+        sva_gpu_va_space->iommu_handle = iommu_handle;
+        nv_kref_init(&sva_gpu_va_space->kref);
+    }
+
+out:
+    mmput(mm);
+    return status;
+}
+
+static void uvm_sva_reset_iommu_handle(nv_kref_t *nv_kref)
+{
+    uvm_sva_gpu_va_space_t *sva_gpu_va_space = container_of(nv_kref, uvm_sva_gpu_va_space_t, kref);
+    sva_gpu_va_space->iommu_handle = NULL;
+}
+
+void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+{
+    uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
+
+    // ARM SMMU layer decrements the refcount for the {pci_dev, mm} pair.
+    // The actual unbind happens only when the refcount reaches zero.
+    if (sva_gpu_va_space->iommu_handle) {
+        iommu_sva_unbind_device(sva_gpu_va_space->iommu_handle);
+        nv_kref_put(&sva_gpu_va_space->kref, uvm_sva_reset_iommu_handle);
+    }
+}
+
+NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+    NvU32 pasid;
+    NV_STATUS status = NV_OK;
+    uvm_sva_gpu_va_space_t *sva_gpu_va_space = &gpu_va_space->ats.sva;
+
+    // A successful iommu_sva_bind_device() should have preceded this call.
+    UVM_ASSERT(sva_gpu_va_space->iommu_handle);
+
+    pasid = iommu_sva_get_pasid(sva_gpu_va_space->iommu_handle);
+    if (pasid == IOMMU_PASID_INVALID)
+        status = errno_to_nv_status(ENODEV);
+    else
+        gpu_va_space->ats.pasid = pasid;
+
+    return status;
+}
+
+void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+{
+    gpu_va_space->ats.pasid = -1U;
+}
+
+#endif // UVM_ATS_SVA_SUPPORTED()
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.h
@@ -0,0 +1,112 @@
+/*******************************************************************************
+    Copyright (c) 2018-2023 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_ATS_SVA_H__
+#define __UVM_ATS_SVA_H__
+
+#include "uvm_gpu.h"
+#include "uvm_forward_decl.h"
+
+#include <linux/iommu.h>
+
+// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
+// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
+// conftest-able. We instead look for the presence of ioasid_get() or
+// mm_pasid_set(). ioasid_get() was added in the same patch series as
+// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
+// same patch as the removal of ioasid_get(). We assume the presence of
+// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
+// present.
+//
+// arm_smmu_sva_bind() was added with commit
+// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
+// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
+// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
+// and added mm_pasid_set().
+    #if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
+        #define UVM_ATS_SVA_SUPPORTED() 1
+    #else
+        #define UVM_ATS_SVA_SUPPORTED() 0
+    #endif
+
+typedef struct
+{
+    int placeholder;
+} uvm_sva_va_space_t;
+
+typedef struct
+{
+    // Reference count for the iommu_handle
+    nv_kref_t kref;
+    struct iommu_sva *iommu_handle;
+} uvm_sva_gpu_va_space_t;
+
+#if UVM_ATS_SVA_SUPPORTED()
+    NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu);
+    void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu);
+
+    // LOCKING: mmap_lock must be lockable
+    //          VA space lock must not be held.
+    NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
+
+    // LOCKING: VA space lock must not be held.
+    void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
+
+    // LOCKING: None
+    NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+    // LOCKING: None
+    void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+#else
+    static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
+    {
+
+    }
+
+    static NV_STATUS uvm_ats_sva_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_ats_sva_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
+    {
+
+    }
+
+    static NV_STATUS uvm_ats_sva_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
+    {
+
+    }
+#endif // UVM_ATS_SVA_SUPPORTED
+
+#endif // __UVM_ATS_SVA_H__
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -24,12 +24,14 @@
 #include "uvm_channel.h"
 #include "uvm_global.h"
 #include "uvm_hal.h"
+#include "uvm_kvmalloc.h"
 #include "uvm_push.h"
 #include "uvm_test.h"
 #include "uvm_tracker.h"
 #include "uvm_va_space.h"
 #include "uvm_rm_mem.h"
 #include "uvm_mem.h"
+#include "uvm_gpu.h"

 #define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
 #define CE_TEST_MEM_END_SIZE 32
@@ -52,6 +54,11 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
    uvm_push_t push;
    bool is_proxy;

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
    TEST_CHECK_GOTO(status == NV_OK, done);
    host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
@@ -66,7 +73,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
    TEST_CHECK_GOTO(status == NV_OK, done);

    is_proxy = uvm_channel_is_proxy(push.channel);
-    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
+    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy).address;

    // All of the following CE transfers are done from a single (L)CE and
    // disabling pipelining is enough to order them when needed. Only push_end
@@ -74,7 +81,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)

    // Initialize to a bad value
    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
-        mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
+        mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;

        uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
@@ -83,7 +90,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)

    // Set the first buffer to 1
    uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-    mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
+    mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy).address;
    gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);

    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
@@ -91,9 +98,9 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
        if (dst == CE_TEST_MEM_COUNT)
            dst_va = host_mem_gpu_va;
        else
-            dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
+            dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy).address;

-        src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
+        src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address;

        // The first memcpy needs to be non-pipelined as otherwise the previous
        // memset/memcpy to the source may not be done yet.
@@ -167,6 +174,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
    uvm_push_t push;
    NvU32 value;

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
    TEST_CHECK_GOTO(status == NV_OK, done);
    host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
@@ -175,11 +187,11 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
    TEST_CHECK_GOTO(status == NV_OK, done);

-    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
+    host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel)).address;

    for (i = 0; i < REDUCTIONS; ++i) {
        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-        gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
+        gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
    }

    // Without a sys membar the channel tracking semaphore can and does complete
@@ -333,6 +345,16 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
        return NV_ERR_INVALID_STATE;
    }

+    // If physical accesses aren't supported, silently convert to virtual to
+    // test the flat mapping.
+    TEST_CHECK_RET(gpu_verif_addr.is_virtual);
+
+    if (!src.is_virtual)
+        src = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(src.aperture, src.address));
+
+    if (!dst.is_virtual)
+        dst = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(dst.aperture, dst.address));
+
    // Memset src with the appropriate element size, then memcpy to dst and from
    // dst to the verif location (physical sysmem).

@@ -374,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
 static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
 {
    NV_STATUS status = NV_OK;
-    bool is_proxy_va_space;
+    bool is_proxy_va_space = false;
    uvm_gpu_address_t gpu_verif_addr;
    void *cpu_verif_addr;
    uvm_mem_t *verif_mem = NULL;
@@ -382,17 +404,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    uvm_mem_t *gpu_uvm_mem = NULL;
    uvm_rm_mem_t *sys_rm_mem = NULL;
    uvm_rm_mem_t *gpu_rm_mem = NULL;
-    uvm_gpu_address_t gpu_addresses[4];
-    NvU64 gpu_va;
-    size_t size;
+    uvm_gpu_address_t gpu_addresses[4] = {0};
+    size_t size = gpu->big_page.internal_size;
    static const size_t element_sizes[] = {1, 4, 8};
    const size_t iterations = 4;
    size_t i, j, k, s;
    uvm_mem_alloc_params_t mem_params = {0};

-    size = gpu->big_page.internal_size;
-
-    TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
+    if (uvm_conf_computing_mode_enabled(gpu))
+        TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
+    else
+        TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);

    gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
@@ -410,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
        }
    }

+    // Virtual address (in UVM's internal address space) backed by sysmem
+    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
+    gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
+
+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        for (i = 0; i < iterations; ++i) {
+            for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
+                TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
+                                                                gpu_addresses[0],
+                                                                gpu_addresses[0],
+                                                                size,
+                                                                element_sizes[s],
+                                                                gpu_verif_addr,
+                                                                cpu_verif_addr,
+                                                                i),
+                                    done);
+
+            }
+        }
+
+        // Because gpu_verif_addr is in sysmem, when the Confidential
+        // Computing feature is enabled, only the previous cases are valid.
+        // TODO: Bug 3839176: the test partially waived on Confidential
+        // Computing because it assumes that GPU can access system memory
+        // without using encryption.
+        goto done;
+    }
+
    // Using a page size equal to the allocation size ensures that the UVM
    // memories about to be allocated are physically contiguous. And since the
    // size is a valid GPU page size, the memories can be virtually mapped on
@@ -421,23 +471,17 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    // Physical address in sysmem
    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
-    gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
+    gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);

    // Physical address in vidmem
    mem_params.backing_gpu = gpu;
    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
-    gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
+    gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);

    // Virtual address (in UVM's internal address space) backed by vidmem
    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
-    is_proxy_va_space = false;
-    gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
-    gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
+    gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);

-    // Virtual address (in UVM's internal address space) backed by sysmem
-    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
-    gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
-    gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);

    for (i = 0; i < iterations; ++i) {
        for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
@@ -513,6 +557,11 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
    // Semaphore reduction needs 1 word (4 bytes).
    const size_t size = sizeof(NvU32);

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = test_semaphore_alloc_sem(gpu, size, &mem);
    TEST_CHECK_RET(status == NV_OK);

@@ -528,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)

    for (i = 0; i < REDUCTIONS; i++) {
        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-        gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
+        gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
    }

    status = uvm_push_end_and_wait(&push);
@@ -560,6 +609,11 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
    // Semaphore release needs 1 word (4 bytes).
    const size_t size = sizeof(NvU32);

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = test_semaphore_alloc_sem(gpu, size, &mem);
    TEST_CHECK_RET(status == NV_OK);

@@ -609,6 +663,11 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
    // The semaphore is 4 words long (16 bytes).
    const size_t size = 16;

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = test_semaphore_alloc_sem(gpu, size, &mem);
    TEST_CHECK_RET(status == NV_OK);

@@ -645,6 +704,517 @@ done:
    return status;
 }

+static bool mem_match(uvm_mem_t *mem1, uvm_mem_t *mem2, size_t size)
+{
+    void *mem1_addr;
+    void *mem2_addr;
+
+    UVM_ASSERT(uvm_mem_is_sysmem(mem1));
+    UVM_ASSERT(uvm_mem_is_sysmem(mem2));
+    UVM_ASSERT(mem1->size >= size);
+    UVM_ASSERT(mem2->size >= size);
+
+    mem1_addr = uvm_mem_get_cpu_addr_kernel(mem1);
+    mem2_addr = uvm_mem_get_cpu_addr_kernel(mem2);
+
+    return !memcmp(mem1_addr, mem2_addr, size);
+}
+
+static NV_STATUS zero_vidmem(uvm_mem_t *mem)
+{
+    uvm_push_t push;
+    uvm_gpu_address_t gpu_address;
+    uvm_gpu_t *gpu = mem->backing_gpu;
+
+    UVM_ASSERT(uvm_mem_is_vidmem(mem));
+
+    TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
+
+    gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
+    gpu->parent->ce_hal->memset_1(&push, gpu_address, 0, mem->size);
+
+    TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
+
+    return NV_OK;
+}
+
+static void write_range_cpu(uvm_mem_t *mem, NvU64 base_val)
+{
+    NvU64 *mem_cpu_va;
+    unsigned i;
+
+    UVM_ASSERT(uvm_mem_is_sysmem(mem));
+    UVM_ASSERT(IS_ALIGNED(mem->size, sizeof(*mem_cpu_va)));
+
+    mem_cpu_va = (NvU64 *) uvm_mem_get_cpu_addr_kernel(mem);
+
+    for (i = 0; i < (mem->size / sizeof(*mem_cpu_va)); i++)
+        mem_cpu_va[i] = base_val++;
+}
+
+static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(mem);
+
+    *mem = NULL;
+
+    TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
+    TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);
+
+    return NV_OK;
+
+err:
+    uvm_mem_free(*mem);
+    return status;
+}
+
+static NV_STATUS alloc_sysmem_unprotected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(mem);
+
+    *mem = NULL;
+
+    TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
+    TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
+    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
+
+    memset(uvm_mem_get_cpu_addr_kernel(*mem), 0, (*mem)->size);
+
+    return NV_OK;
+
+err:
+    uvm_mem_free(*mem);
+    return status;
+}
+
+static void cpu_encrypt(uvm_channel_t *channel,
+                        uvm_mem_t *dst_mem,
+                        uvm_mem_t *src_mem,
+                        uvm_mem_t *auth_tag_mem,
+                        size_t size,
+                        NvU32 copy_size)
+{
+    size_t offset = 0;
+    char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
+    char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
+    char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
+
+    while (offset < size) {
+        uvm_conf_computing_cpu_encrypt(channel, dst_cipher, src_plain, NULL, copy_size, auth_tag_buffer);
+
+        offset += copy_size;
+        dst_cipher += copy_size;
+        src_plain += copy_size;
+        auth_tag_buffer += UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+    }
+}
+
+static void cpu_acquire_encryption_ivs(uvm_channel_t *channel,
+                                       size_t size,
+                                       NvU32 copy_size,
+                                       UvmCslIv *ivs)
+{
+    size_t offset = 0;
+    int i = 0;
+
+    for (; offset < size; offset += copy_size)
+        uvm_conf_computing_acquire_encryption_iv(channel, &ivs[i++]);
+}
+
+static void cpu_encrypt_rev(uvm_channel_t *channel,
+                            uvm_mem_t *dst_mem,
+                            uvm_mem_t *src_mem,
+                            uvm_mem_t *auth_tag_mem,
+                            size_t size,
+                            NvU32 copy_size,
+                            UvmCslIv *encrypt_iv)
+{
+    char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
+    char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
+    char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
+    int i;
+
+    // CPU encrypt order is the opposite of the GPU decrypt order
+    for (i = (size / copy_size) - 1; i >= 0; i--) {
+        uvm_conf_computing_cpu_encrypt(channel,
+                                       dst_cipher + i * copy_size,
+                                       src_plain + i * copy_size,
+                                       encrypt_iv + i,
+                                       copy_size,
+                                       auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
+    }
+}
+
+static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
+                                      uvm_mem_t *dst_mem,
+                                      uvm_mem_t *src_mem,
+                                      const UvmCslIv *decrypt_iv,
+                                      uvm_mem_t *auth_tag_mem,
+                                      size_t size,
+                                      NvU32 copy_size)
+{
+    size_t i;
+    char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
+    char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
+    char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
+
+    for (i = 0; i < size / copy_size; i++) {
+        TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
+                                                         dst_plain + i * copy_size,
+                                                         src_cipher + i * copy_size,
+                                                         decrypt_iv + i,
+                                                         copy_size,
+                                                         auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
+    }
+
+    return NV_OK;
+}
+static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
+                                          uvm_mem_t *dst_mem,
+                                          uvm_mem_t *src_mem,
+                                          const UvmCslIv *decrypt_iv,
+                                          uvm_mem_t *auth_tag_mem,
+                                          size_t size,
+                                          NvU32 copy_size)
+{
+    int i;
+    char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem);
+    char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem);
+    char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem);
+
+    UVM_ASSERT((size / copy_size) <= INT_MAX);
+
+    // CPU decrypt order is the opposite of the GPU decrypt order
+    for (i = (size / copy_size) - 1; i >= 0; i--) {
+        TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel,
+                                                         dst_plain + i * copy_size,
+                                                         src_cipher + i * copy_size,
+                                                         decrypt_iv + i,
+                                                         copy_size,
+                                                         auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
+    }
+
+    return NV_OK;
+}
+
+// GPU address to use as source or destination in CE decrypt/encrypt operations.
+// If the uvm_mem backing storage is contiguous in the [offset, offset + size)
+// interval, the physical address gets priority over the virtual counterpart.
+static uvm_gpu_address_t gpu_address(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU32 size)
+{
+    uvm_gpu_address_t gpu_virtual_address;
+
+    if (uvm_mem_is_physically_contiguous(mem, offset, size))
+        return uvm_mem_gpu_address_physical(mem, gpu, offset, size);
+
+    gpu_virtual_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
+    gpu_virtual_address.address += offset;
+
+    return gpu_virtual_address;
+}
+
+// Automatically get the correct address for the authentication tag. The
+// addressing mode of the tag should match that of the reference address
+// (destination pointer for GPU encrypt, source pointer for GPU encrypt)
+static uvm_gpu_address_t auth_tag_gpu_address(uvm_mem_t *auth_tag_mem,
+                                              uvm_gpu_t *gpu,
+                                              size_t offset,
+                                              uvm_gpu_address_t reference)
+{
+    uvm_gpu_address_t auth_tag_gpu_address;
+
+    if (!reference.is_virtual)
+        return uvm_mem_gpu_address_physical(auth_tag_mem, gpu, offset, UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
+
+    auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
+    auth_tag_gpu_address.address += offset;
+
+    return auth_tag_gpu_address;
+}
+
+// Note: no membar is issued in any of the GPU transfers (encryptions)
+static void gpu_encrypt(uvm_push_t *push,
+                        uvm_mem_t *dst_mem,
+                        uvm_mem_t *src_mem,
+                        uvm_mem_t *auth_tag_mem,
+                        UvmCslIv *decrypt_iv,
+                        size_t size,
+                        NvU32 copy_size)
+{
+    size_t i;
+    size_t num_iterations = size / copy_size;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    for (i = 0; i < num_iterations; i++) {
+        uvm_gpu_address_t dst_cipher = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
+        uvm_gpu_address_t src_plain = gpu_address(src_mem, gpu, i * copy_size, copy_size);
+        uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
+                                                          gpu,
+                                                          i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                                          dst_cipher);
+
+        uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
+
+        if (i > 0)
+            uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+
+        gpu->parent->ce_hal->encrypt(push, dst_cipher, src_plain, copy_size, auth_tag);
+        decrypt_iv++;
+    }
+}
+
+// Note: no membar is issued in any of the GPU transfers (decryptions)
+static void gpu_decrypt(uvm_push_t *push,
+                        uvm_mem_t *dst_mem,
+                        uvm_mem_t *src_mem,
+                        uvm_mem_t *auth_tag_mem,
+                        size_t size,
+                        NvU32 copy_size)
+{
+    size_t i;
+    size_t num_iterations = size / copy_size;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    for (i = 0; i < num_iterations; i++) {
+        uvm_gpu_address_t dst_plain = gpu_address(dst_mem, gpu, i * copy_size, copy_size);
+        uvm_gpu_address_t src_cipher = gpu_address(src_mem, gpu, i * copy_size, copy_size);
+        uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem,
+                                                          gpu,
+                                                          i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                                          src_cipher);
+
+        if (i > 0)
+            uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+
+        gpu->parent->ce_hal->decrypt(push, dst_plain, src_cipher, copy_size, auth_tag);
+    }
+}
+
+static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
+                                           uvm_channel_type_t decrypt_channel_type,
+                                           uvm_channel_type_t encrypt_channel_type,
+                                           size_t size,
+                                           NvU32 copy_size,
+                                           bool decrypt_in_order,
+                                           bool encrypt_in_order)
+{
+    uvm_push_t push;
+    NvU64 init_value;
+    NV_STATUS status = NV_OK;
+    uvm_mem_t *src_plain = NULL;
+    uvm_mem_t *src_cipher = NULL;
+    uvm_mem_t *dst_cipher = NULL;
+    uvm_mem_t *dst_plain_gpu = NULL;
+    uvm_mem_t *dst_plain = NULL;
+    uvm_mem_t *auth_tag_mem = NULL;
+    size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+    UvmCslIv *decrypt_iv = NULL;
+    UvmCslIv *encrypt_iv = NULL;
+    uvm_tracker_t tracker;
+    size_t src_plain_size;
+
+    TEST_CHECK_RET(copy_size <= size);
+    TEST_CHECK_RET(IS_ALIGNED(size, copy_size));
+
+    uvm_tracker_init(&tracker);
+
+    decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
+    if (!decrypt_iv) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    encrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
+    if (!encrypt_iv) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_cipher, size), out);
+    TEST_NV_CHECK_GOTO(alloc_vidmem_protected(gpu, &dst_plain_gpu, size), out);
+    TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_cipher, size), out);
+    TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_plain, size), out);
+    TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &auth_tag_mem, auth_tag_buffer_size), out);
+
+    // The plaintext CPU buffer size should fit the initialization value
+    src_plain_size = UVM_ALIGN_UP(size, sizeof(init_value));
+    TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_plain, src_plain_size), out);
+
+    // Initialize the plaintext CPU buffer using a value that uniquely
+    // identifies the given inputs
+    TEST_CHECK_GOTO((((NvU64) size) < (1ULL << 63)), out);
+    init_value = ((NvU64) decrypt_in_order << 63) | ((NvU64) size) | ((NvU64) copy_size);
+    write_range_cpu(src_plain, init_value);
+
+    TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager,
+                                      decrypt_channel_type,
+                                      &push,
+                                      "CPU > GPU decrypt"),
+                       out);
+
+    // CPU (decrypted) > CPU (encrypted), using CPU, if in-order
+    // acquire IVs if not in-order
+    if (encrypt_in_order)
+        cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
+    else
+        cpu_acquire_encryption_ivs(push.channel, size, copy_size, encrypt_iv);
+
+    // CPU (encrypted) > GPU (decrypted), using GPU
+    gpu_decrypt(&push, dst_plain_gpu, src_cipher, auth_tag_mem, size, copy_size);
+
+    // Use acquired IVs to encrypt in reverse order
+    if (!encrypt_in_order)
+        cpu_encrypt_rev(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size, encrypt_iv);
+
+    uvm_push_end(&push);
+    TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
+
+    // GPU (decrypted) > CPU (encrypted), using GPU
+    TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
+                                              encrypt_channel_type,
+                                              &tracker,
+                                              &push,
+                                              "GPU > CPU encrypt"),
+                       out);
+
+    gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
+
+    TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
+
+    TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
+
+    TEST_CHECK_GOTO(!mem_match(dst_cipher, src_plain, size), out);
+
+    // CPU (encrypted) > CPU (decrypted), using CPU
+    if (decrypt_in_order) {
+        TEST_NV_CHECK_GOTO(cpu_decrypt_in_order(push.channel,
+                                                dst_plain,
+                                                dst_cipher,
+                                                decrypt_iv,
+                                                auth_tag_mem,
+                                                size,
+                                                copy_size),
+                           out);
+    }
+    else {
+        TEST_NV_CHECK_GOTO(cpu_decrypt_out_of_order(push.channel,
+                                                    dst_plain,
+                                                    dst_cipher,
+                                                    decrypt_iv,
+                                                    auth_tag_mem,
+                                                    size,
+                                                    copy_size),
+                           out);
+    }
+
+    TEST_CHECK_GOTO(mem_match(src_plain, dst_plain, size), out);
+
+out:
+    uvm_mem_free(auth_tag_mem);
+    uvm_mem_free(dst_plain);
+    uvm_mem_free(dst_plain_gpu);
+    uvm_mem_free(dst_cipher);
+    uvm_mem_free(src_cipher);
+    uvm_mem_free(src_plain);
+    uvm_tracker_deinit(&tracker);
+    uvm_kvfree(decrypt_iv);
+    uvm_kvfree(encrypt_iv);
+
+    return status;
+}
+
+static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
+                                            uvm_channel_type_t decrypt_channel_type,
+                                            uvm_channel_type_t encrypt_channel_type)
+{
+    bool cpu_decrypt_in_order = true;
+    bool cpu_encrypt_in_order = true;
+    size_t size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_4K * 2, UVM_PAGE_SIZE_2M};
+    size_t copy_size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_2M};
+    unsigned i;
+
+    struct {
+        bool encrypt_in_order;
+        bool decrypt_in_order;
+    } orders[] = {{true, true}, {true, false}, {false, true}, {false, false}};
+
+    struct {
+        size_t size;
+        NvU32 copy_size;
+    } small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};
+
+    // Only Confidential Computing uses CE encryption/decryption
+    if (!uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
+    // Use a size, and copy size, that are not a multiple of common page sizes.
+    for (i = 0; i < ARRAY_SIZE(small_sizes); ++i) {
+        // Skip tests that need large pushbuffer on WLC. Secure work launch
+        // needs to do at least one decrypt operation so tests that only need
+        // one operation work ok. Tests using more operations might overflow
+        // UVM_MAX_WLC_PUSH_SIZE.
+        if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (small_sizes[i].size / small_sizes[i].copy_size > 1))
+            continue;
+
+        TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
+                                                    decrypt_channel_type,
+                                                    encrypt_channel_type,
+                                                    small_sizes[i].size,
+                                                    small_sizes[i].copy_size,
+                                                    cpu_decrypt_in_order,
+                                                    cpu_encrypt_in_order));
+    }
+
+    // Use sizes, and copy sizes, that are a multiple of common page sizes.
+    // This is the most typical usage of encrypt/decrypt in the UVM driver.
+    for (i = 0; i < ARRAY_SIZE(orders); ++i) {
+        unsigned j;
+
+        cpu_encrypt_in_order = orders[i].encrypt_in_order;
+        cpu_decrypt_in_order = orders[i].decrypt_in_order;
+
+        for (j = 0; j < ARRAY_SIZE(size); ++j) {
+            unsigned k;
+
+            for (k = 0; k < ARRAY_SIZE(copy_size); ++k) {
+                if (copy_size[k] > size[j])
+                    continue;
+
+                // Skip tests that need large pushbuffer on WLC. Secure work
+                // launch needs to do at least one decrypt operation so tests
+                // that only need one operation work ok. Tests using more
+                // operations might overflow UVM_MAX_WLC_PUSH_SIZE.
+                if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (size[j] / copy_size[k] > 1))
+                    continue;
+
+                // There is no difference between in-order and out-of-order
+                // decryption when encrypting once.
+                if ((copy_size[k] == size[j]) && !cpu_decrypt_in_order)
+                    continue;
+
+                TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu,
+                                                            decrypt_channel_type,
+                                                            encrypt_channel_type,
+                                                            size[j],
+                                                            copy_size[k],
+                                                            cpu_decrypt_in_order,
+                                                            cpu_encrypt_in_order));
+            }
+        }
+    }
+
+    return NV_OK;
+}
+
 static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
 {
    uvm_gpu_t *gpu;
@@ -655,9 +1225,13 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
        TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
        TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
        TEST_NV_CHECK_RET(test_semaphore_release(gpu));
+
        if (!skipTimestampTest)
            TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
-    }
+
+        TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_CPU_TO_GPU, UVM_CHANNEL_TYPE_GPU_TO_CPU));
+        TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_WLC, UVM_CHANNEL_TYPE_WLC));
+   }

    return NV_OK;
 }
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -50,6 +50,9 @@
 #define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
 #define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)

+// Maximum number of channels per pool.
+#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL UVM_PUSH_MAX_CONCURRENT_PUSHES
+
 // Semaphore payloads cannot advance too much between calls to
 // uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
 // are bound by gpfifo sizing as we have to update the completed value to
@@ -61,6 +64,14 @@
 // uvm_channel.h includes uvm_gpu_semaphore.h.
 #define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)

+#define uvm_channel_pool_assert_locked(pool) (          \
+{                                                       \
+    if (uvm_channel_pool_uses_mutex(pool))              \
+        uvm_assert_mutex_locked(&(pool)->mutex);        \
+    else                                                \
+        uvm_assert_spinlock_locked(&(pool)->spinlock);  \
+})
+
 // Channel types
 typedef enum
 {
@@ -83,7 +94,27 @@ typedef enum

    // ^^^^^^
    // Channel types backed by a CE.
-    UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
+    // ----------------------------------
+    // Channel types not backed by a CE.
+    // vvvvvv
+
+    // SEC2 channels
+    UVM_CHANNEL_TYPE_SEC2 = UVM_CHANNEL_TYPE_CE_COUNT,
+
+    // ----------------------------------
+    // Channel type with fixed schedules
+
+    // Work Launch Channel (WLC) is a specialized channel for launching work on
+    // other channels when the Confidential Computing is feature enabled. It is
+    // paired with LCIC (below)
+    UVM_CHANNEL_TYPE_WLC,
+
+    // Launch Confirmation Indicator Channel (LCIC) is a specialized channel
+    // with fixed schedule. It gets triggered by executing WLC work, and makes
+    // sure that WLC get/put pointers are up-to-date.
+    UVM_CHANNEL_TYPE_LCIC,
+
+    UVM_CHANNEL_TYPE_COUNT,
 } uvm_channel_type_t;

 typedef enum
@@ -101,7 +132,15 @@ typedef enum
    // There is a single proxy pool and channel per GPU.
    UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),

-    UVM_CHANNEL_POOL_TYPE_COUNT = 2,
+    // A pool of SEC2 channels owned by UVM. These channels are backed by a SEC2
+    // engine.
+    UVM_CHANNEL_POOL_TYPE_SEC2 = (1 << 2),
+
+    UVM_CHANNEL_POOL_TYPE_WLC = (1 << 3),
+
+    UVM_CHANNEL_POOL_TYPE_LCIC = (1 << 4),
+
+    UVM_CHANNEL_POOL_TYPE_COUNT = 5,

    // A mask used to select pools of any type.
    UVM_CHANNEL_POOL_TYPE_MASK  = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
@@ -125,16 +164,24 @@ struct uvm_gpfifo_entry_struct
    // this entry.
    NvU64 tracking_semaphore_value;

+    union {
+        struct {
+            // Offset of the pushbuffer in the pushbuffer allocation used by
+            // this entry.
+            NvU32 pushbuffer_offset;
+
+            // Size of the pushbuffer used for this entry.
+            NvU32 pushbuffer_size;
+        };
+
+        // Value of control entry
+        // Exact value of GPFIFO entry copied directly to GPFIFO[PUT] location.
+        NvU64 control_value;
+    };
+
    // The following fields are only valid when type is
    // UVM_GPFIFO_ENTRY_TYPE_NORMAL.

-    // Offset of the pushbuffer in the pushbuffer allocation used by
-    // this entry.
-    NvU32 pushbuffer_offset;
-
-    // Size of the pushbuffer used for this entry.
-    NvU32 pushbuffer_size;
-
    // List node used by the pushbuffer tracking
    struct list_head pending_list_node;

@@ -149,6 +196,19 @@ typedef struct
    // Owning channel manager
    uvm_channel_manager_t *manager;

+    // On Volta+ GPUs, all channels in a pool are members of the same TSG, i.e.,
+    // num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
+    // does not support TSG for CE engines, a HW TSG is not created, but a TSG
+    // object is required to allocate channels.
+    // When Confidential Computing mode is enabled, the WLC and LCIC channel
+    // types require one TSG for each WLC/LCIC pair of channels. In this case,
+    // we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
+    // channel pair, num_tsgs equals to the number of channel pairs.
+    uvmGpuTsgHandle *tsg_handles;
+
+    // Number TSG handles owned by this pool.
+    NvU32 num_tsgs;
+
    // Channels in this pool
    uvm_channel_t *channels;

@@ -162,12 +222,27 @@ typedef struct
    // Pool type: Refer to the uvm_channel_pool_type_t enum.
    uvm_channel_pool_type_t pool_type;

-    // Lock protecting the state of channels in the pool
+    // Lock protecting the state of channels in the pool.
+    //
+    // There are two pool lock types available: spinlock and mutex. The mutex
+    // variant is required when the thread holding the pool lock must sleep
+    // (ex: acquire another mutex) deeper in the call stack, either in UVM or
+    // RM.
    union {
        uvm_spinlock_t spinlock;
        uvm_mutex_t mutex;
    };

+    // Secure operations require that uvm_push_begin order matches
+    // uvm_push_end order, because the engine's state is used in its internal
+    // operation and each push may modify this state. push_locks is protected by
+    // the channel pool lock.
+    DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
+
+    // Counting semaphore for available and unlocked channels, it must be
+    // acquired before submitting work to a channel when the Confidential
+    // Computing feature is enabled.
+    uvm_semaphore_t push_sem;
 } uvm_channel_pool_t;

 struct uvm_channel_struct
@@ -218,6 +293,74 @@ struct uvm_channel_struct
    // uvm_channel_end_push().
    uvm_gpu_tracking_semaphore_t tracking_sem;

+    struct
+    {
+        // Secure operations require that uvm_push_begin order matches
+        // uvm_push_end order, because the engine's state is used in
+        // its internal operation and each push may modify this state.
+        uvm_mutex_t push_lock;
+
+        // When the Confidential Computing feature is enabled, every channel has
+        // cryptographic state in HW, which is mirrored here for CPU-side
+        // operations.
+        UvmCslContext ctx;
+        bool is_ctx_initialized;
+
+        // CPU-side CSL crypto operations which operate on the same CSL state
+        // are not thread-safe, so they must be wrapped in locks at the UVM
+        // level. Encryption, decryption and logging operations must be
+        // protected with the ctx_lock.
+        uvm_mutex_t ctx_lock;
+    } csl;
+
+    struct
+    {
+        // The value of GPU side PUT index.
+        // Indirect work submission introduces delay between updating the CPU
+        // put when ending a push, and updating the GPU visible value via
+        // indirect work launch. It is used to order multiple pending indirect
+        // work launches to match the order of push end-s that triggered them.
+        volatile NvU32 gpu_put;
+
+        // Static pushbuffer for channels with static schedule (WLC/LCIC)
+        uvm_rm_mem_t *static_pb_protected_vidmem;
+
+        // Static pushbuffer staging buffer for WLC
+        uvm_rm_mem_t *static_pb_unprotected_sysmem;
+        void *static_pb_unprotected_sysmem_cpu;
+        void *static_pb_unprotected_sysmem_auth_tag_cpu;
+
+        // The above static locations are required by the WLC (and LCIC)
+        // schedule. Protected sysmem location completes WLC's independence
+        // from the pushbuffer allocator.
+        void *static_pb_protected_sysmem;
+
+        // Static tracking semaphore notifier values
+        // Because of LCIC's fixed schedule, the secure semaphore release
+        // mechanism uses two additional static locations for incrementing the
+        // notifier values. See:
+        // . channel_semaphore_secure_release()
+        // . setup_lcic_schedule()
+        // . internal_channel_submit_work_wlc()
+        uvm_rm_mem_t *static_notifier_unprotected_sysmem;
+        NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
+        NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
+        uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
+        uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
+
+        // Explicit location for push launch tag used by WLC.
+        // Encryption auth tags have to be located in unprotected sysmem.
+        void *launch_auth_tag_cpu;
+        NvU64 launch_auth_tag_gpu_va;
+
+        // Used to decrypt the push back to protected sysmem.
+        // This happens when profilers register callbacks for migration data.
+        uvm_push_crypto_bundle_t *push_crypto_bundles;
+
+        // Accompanying authentication tags for the crypto bundles
+        uvm_rm_mem_t *push_crypto_bundle_auth_tags;
+    } conf_computing;
+
    // RM channel information
    union
    {
@@ -275,7 +418,7 @@ struct uvm_channel_manager_struct
    unsigned num_channel_pools;

    // Mask containing the indexes of the usable Copy Engines. Each usable CE
-    // has a pool associated with it, see channel_manager_ce_pool
+    // has at least one pool associated with it.
    DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);

    struct
@@ -313,13 +456,50 @@ struct uvm_channel_manager_struct
 // Create a channel manager for the GPU
 NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);

-void uvm_channel_pool_lock(uvm_channel_pool_t *pool);
-void uvm_channel_pool_unlock(uvm_channel_pool_t *pool);
-void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool);
+static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
+{
+    return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
+}
+
+static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
+{
+    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
+
+    return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
+}
+
+static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
+{
+    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
+
+    return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
+}
+
+static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
+{
+    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
+
+    return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
+}
+
+static bool uvm_channel_is_sec2(uvm_channel_t *channel)
+{
+    return uvm_channel_pool_is_sec2(channel->pool);
+}
+
+static bool uvm_channel_is_wlc(uvm_channel_t *channel)
+{
+    return uvm_channel_pool_is_wlc(channel->pool);
+}
+
+static bool uvm_channel_is_lcic(uvm_channel_t *channel)
+{
+    return uvm_channel_pool_is_lcic(channel->pool);
+}

 static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
 {
-    UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
+    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));

    return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
 }
@@ -329,12 +509,18 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
    return uvm_channel_pool_is_proxy(channel->pool);
 }

+static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
+{
+    return !uvm_channel_pool_is_sec2(pool);
+}
+
 static bool uvm_channel_is_ce(uvm_channel_t *channel)
 {
-    UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
-    return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
+    return uvm_channel_pool_is_ce(channel->pool);
 }

+bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool);
+
 // Proxy channels are used to push page tree related methods, so their channel
 // type is UVM_CHANNEL_TYPE_MEMOPS.
 static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
@@ -389,6 +575,13 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
 // beginning.
 NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);

+// Check if WLC/LCIC mechanism is ready/setup
+// Should only return false during initialization
+static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
+{
+    return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
+           (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
+}
 // Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
 // associated with access_channel.
 //
@@ -449,6 +642,10 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
 const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
 const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);

+// Returns the number of available GPFIFO entries. The function internally
+// acquires the channel pool lock.
+NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
+
 void uvm_channel_print_pending_pushes(uvm_channel_t *channel);

 static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
@@ -456,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
    return channel->pool->manager->gpu;
 }

+static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
+{
+    return channel->pool->manager->pushbuffer;
+}
+
 // Index of a channel within the owning pool
 static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
 {
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -60,6 +60,11 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
    gpu = uvm_va_space_find_first_gpu(va_space);
    TEST_CHECK_RET(gpu != NULL);

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
    TEST_CHECK_GOTO(status == NV_OK, done);

@@ -69,7 +74,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
    TEST_CHECK_GOTO(status == NV_OK, done);

-    gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
+    gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;

    // Semaphore release as part of uvm_push_end() will do the membar
    uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
@@ -104,7 +109,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
                                                value + 1);
                TEST_CHECK_GOTO(status == NV_OK, done);

-                gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
+                gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel)).address;
                gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
                gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);

@@ -167,11 +172,12 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
        completed_value = uvm_channel_update_completed_value(channel);
        uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);

-        TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
+        TEST_NV_CHECK_RET(uvm_global_get_status());
        uvm_channel_update_progress_all(channel);
        TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);

        uvm_channel_manager_destroy(gpu->channel_manager);
+
        // Destruction will hit the error again, so clear one more time.
        uvm_global_reset_fatal_error();

@@ -199,6 +205,9 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
    uvm_for_each_pool(pool, manager) {
        uvm_channel_t *channel;

+            // Skip LCIC channels as those can't accept any pushes
+            if (uvm_channel_pool_is_lcic(pool))
+                continue;
        uvm_for_each_channel_in_pool(channel, pool) {
            NvU32 i;
            for (i = 0; i < 512; ++i) {
@@ -340,8 +349,8 @@ static void snapshot_counter(uvm_push_t *push,
        return;

    is_proxy_channel = uvm_channel_is_proxy(push->channel);
-    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
-    snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel) + index * 2 * sizeof(NvU32);
+    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;
+    snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel).address + index * 2 * sizeof(NvU32);

    // Copy the last and first counter to a snapshot for later verification.

@@ -366,7 +375,7 @@ static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value
    bool is_proxy_channel;

    is_proxy_channel = uvm_channel_is_proxy(push->channel);
-    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
+    counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel).address;

    gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
 }
@@ -426,7 +435,7 @@ static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 val
    UVM_ASSERT(rm_mem->size % 4 == 0);

    gpu = uvm_push_get_gpu(push);
-    gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel));
+    gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel)).address;

    gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
 }
@@ -671,6 +680,72 @@ done:
    return status;
 }

+// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
+// This test verifies that concurrent pushes using the same channel pool
+// select different channels, when the Confidential Computing feature is
+// enabled.
+NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
+{
+    NV_STATUS status = NV_OK;
+    uvm_channel_pool_t *pool;
+    uvm_push_t *pushes;
+    uvm_gpu_t *gpu;
+    NvU32 i;
+    NvU32 num_pushes;
+
+    gpu = uvm_va_space_find_first_gpu(va_space);
+
+    if (!uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
+    uvm_thread_context_lock_disable_tracking();
+
+    for_each_va_space_gpu(gpu, va_space) {
+        uvm_channel_type_t channel_type;
+
+        for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
+            pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
+            TEST_CHECK_RET(pool != NULL);
+
+            // Skip LCIC channels as those can't accept any pushes
+            if (uvm_channel_pool_is_lcic(pool))
+                continue;
+
+            if (pool->num_channels < 2)
+                continue;
+
+            num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
+
+            pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
+            TEST_CHECK_RET(pushes != NULL);
+
+            for (i = 0; i < num_pushes; i++) {
+                uvm_push_t *push = &pushes[i];
+                status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
+                TEST_NV_CHECK_GOTO(status, error);
+                if (i > 0)
+                    TEST_CHECK_GOTO(pushes[i-1].channel != push->channel, error);
+            }
+            for (i = 0; i < num_pushes; i++) {
+                uvm_push_t *push = &pushes[i];
+                status = uvm_push_end_and_wait(push);
+                TEST_NV_CHECK_GOTO(status, error);
+            }
+
+            uvm_kvfree(pushes);
+        }
+    }
+
+    uvm_thread_context_lock_enable_tracking();
+
+    return status;
+error:
+    uvm_thread_context_lock_enable_tracking();
+    uvm_kvfree(pushes);
+
+    return status;
+}
+
 NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
@@ -682,6 +757,14 @@ NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
        uvm_for_each_pool(pool, manager) {
            uvm_channel_t *channel;

+            // Skip LCIC channels as those can't accept any pushes
+            if (uvm_channel_pool_is_lcic(pool))
+                continue;
+
+            // Skip WLC channels as those can't accept ctrl gpfifos
+            // after their schedule is set up
+            if (uvm_channel_pool_is_wlc(pool))
+                continue;
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;

@@ -713,6 +796,14 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
        uvm_for_each_pool(pool, manager) {
            uvm_channel_t *channel;

+            // Skip LCIC channels as those can't accept any pushes
+            if (uvm_channel_pool_is_lcic(pool))
+                continue;
+
+            // Skip WLC channels as those can't accept ctrl gpfifos
+            // after their schedule is set up
+            if (uvm_channel_pool_is_wlc(pool))
+                continue;
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;
                uvm_push_t push;
@@ -742,22 +833,6 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
    return NV_OK;
 }

-static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
-{
-    NvU32 pending_entries;
-
-    uvm_channel_pool_lock(channel->pool);
-
-    if (channel->cpu_put >= channel->gpu_get)
-        pending_entries = channel->cpu_put - channel->gpu_get;
-    else
-        pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
-
-    uvm_channel_pool_unlock(channel->pool);
-
-    return channel->num_gpfifo_entries - pending_entries - 1;
-}
-
 NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
 {
    NV_STATUS status = NV_OK;
@@ -770,9 +845,15 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
    NvU64 entry;
    uvm_push_t push;

+    gpu = uvm_va_space_find_first_gpu(va_space);
+
+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    for_each_va_space_gpu(gpu, va_space) {
        uvm_channel_manager_t *manager = gpu->channel_manager;
-        gpu = manager->gpu;

        TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
        cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
@@ -790,6 +871,12 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
        gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
        uvm_push_end(&push);

+        // Flush all completed entries from the GPFIFO ring buffer. This test
+        // requires this flush because we verify (below with
+        // uvm_channel_get_available_gpfifo_entries) the number of free entries
+        // in the channel.
+        uvm_channel_update_progress_all(channel);
+
        // Populate the remaining GPFIFO entries, leaving 2 slots available.
        // 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
        // indicate a terminal condition for the GPFIFO ringbuffer, therefore we
@@ -799,7 +886,7 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
            uvm_push_end(&push);
        }

-        TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
+        TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);

        // We should have room for the control GPFIFO and the subsequent
        // semaphore release.
@@ -858,6 +945,9 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
        uvm_for_each_pool(pool, manager) {
            uvm_channel_t *channel;

+            // Skip LCIC channels as those can't accept any pushes
+            if (uvm_channel_pool_is_lcic(pool))
+                continue;
            uvm_for_each_channel_in_pool(channel, pool) {
                NvU32 i;
                uvm_push_t push;
@@ -905,6 +995,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
    if (status != NV_OK)
        goto done;

+    status = test_conf_computing_channel_selection(va_space);
+    if (status != NV_OK)
+        goto done;
+
    // The following tests have side effects, they reset the GPU's
    // channel_manager.
    status = test_channel_pushbuffer_extension_base(va_space);
@@ -935,7 +1029,7 @@ done:
 static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
                                                const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
 {
-    NV_STATUS status;
+    NV_STATUS status = NV_OK;

    if (params->iterations == 0 || params->num_streams == 0)
        return NV_ERR_INVALID_PARAMETER;
@@ -945,13 +1039,16 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
    uvm_mutex_lock(&g_uvm_global.global_lock);
    uvm_va_space_down_read_rm(va_space);

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(uvm_va_space_find_first_gpu(va_space)))
+        goto done;
+
    status = stress_test_all_gpus_in_va(va_space,
                                        params->num_streams,
                                        params->iterations,
                                        params->seed,
                                        params->verbose);
-    if (status != NV_OK)
-        goto done;

 done:
    uvm_va_space_up_read_rm(va_space);
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -211,6 +211,11 @@ static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
 {
    return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
 }
+#define UVM_SIZE_1KB (1024ULL)
+#define UVM_SIZE_1MB (1024 * UVM_SIZE_1KB)
+#define UVM_SIZE_1GB (1024 * UVM_SIZE_1MB)
+#define UVM_SIZE_1TB (1024 * UVM_SIZE_1GB)
+#define UVM_SIZE_1PB (1024 * UVM_SIZE_1TB)

 #define UVM_ALIGN_DOWN(x, a) ({         \
        typeof(x) _a = a;               \
@@ -347,6 +352,22 @@ typedef struct
    NvHandle user_object;
 } uvm_rm_user_object_t;

+typedef enum
+{
+    UVM_FD_UNINITIALIZED,
+    UVM_FD_INITIALIZING,
+    UVM_FD_VA_SPACE,
+    UVM_FD_MM,
+    UVM_FD_COUNT
+} uvm_fd_type_t;
+
+// This should be large enough to fit the valid values from uvm_fd_type_t above.
+// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
+// coverage tool fails due when the preprocessor expands that to a huge mess of
+// ternary operators.
+#define UVM_FD_TYPE_BITS 2
+#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
+
 // Macro used to compare two values for types that support less than operator.
 // It returns -1 if a < b, 1 if a > b and 0 if a == 0
 #define UVM_CMP_DEFAULT(a,b)              \
@@ -369,6 +390,14 @@ typedef struct
 // file. A NULL input returns false.
 bool uvm_file_is_nvidia_uvm(struct file *filp);

+// Returns the type of data filp->private_data contains to and if ptr_val !=
+// NULL returns the value of the pointer.
+uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
+
+// Returns the pointer stored in filp->private_data if the type
+// matches, otherwise returns NULL.
+void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
+
 // Reads the first word in the supplied struct page.
 static inline void uvm_touch_page(struct page *page)
 {
@@ -381,4 +410,7 @@ static inline void uvm_touch_page(struct page *page)
    kunmap(page);
 }

+// Return true if the VMA is one used by UVM managed allocations.
+bool uvm_vma_is_managed(struct vm_area_struct *vma);
+
 #endif /* _UVM_COMMON_H */
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -0,0 +1,501 @@
+/*******************************************************************************
+    Copyright (c) 2021-2023 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_common.h"
+#include "uvm_global.h"
+#include "uvm_conf_computing.h"
+#include "uvm_kvmalloc.h"
+#include "uvm_gpu.h"
+#include "uvm_hal.h"
+#include "uvm_mem.h"
+#include "uvm_processors.h"
+#include "uvm_tracker.h"
+#include "nv_uvm_interface.h"
+#include "uvm_va_block.h"
+
+
+static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
+{
+    return parent->rm_info.gpuConfComputeCaps.mode;
+}
+
+bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent)
+{
+    return uvm_conf_computing_get_mode(parent) != UVM_GPU_CONF_COMPUTE_MODE_NONE;
+}
+
+bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu)
+{
+    return uvm_conf_computing_mode_enabled_parent(gpu->parent);
+}
+
+bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
+{
+    return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
+}
+
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
+{
+    uvm_gpu_t *first_gpu;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    // The Confidential Computing state of the GPU should match that of the
+    // system.
+    UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
+
+    // TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
+    // find first child GPU and get its parent.
+    first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
+    if (first_gpu == NULL)
+        return;
+
+    // All GPUs derive Confidential Computing status from their parent. By
+    // current policy all parent GPUs have identical Confidential Computing
+    // status.
+    UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
+}
+
+static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                      uvm_conf_computing_dma_buffer_t *dma_buffer)
+{
+    uvm_assert_mutex_locked(&dma_buffer_pool->lock);
+
+    list_del(&dma_buffer->node);
+    uvm_tracker_wait_deinit(&dma_buffer->tracker);
+
+    uvm_mem_free(dma_buffer->alloc);
+    uvm_mem_free(dma_buffer->auth_tag);
+    uvm_kvfree(dma_buffer);
+}
+
+static uvm_gpu_t *dma_buffer_pool_to_gpu(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
+{
+    return container_of(dma_buffer_pool, uvm_gpu_t, conf_computing.dma_buffer_pool);
+}
+
+// Allocate and map a new DMA stage buffer to CPU and GPU (VA)
+static NV_STATUS dma_buffer_create(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                   uvm_conf_computing_dma_buffer_t **dma_buffer_out)
+{
+    uvm_gpu_t *dma_owner;
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+    uvm_mem_t *alloc = NULL;
+    NV_STATUS status = NV_OK;
+    size_t auth_tags_size = (UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+
+    dma_buffer = uvm_kvmalloc_zero(sizeof(*dma_buffer));
+    if (!dma_buffer)
+        return NV_ERR_NO_MEMORY;
+
+    dma_owner = dma_buffer_pool_to_gpu(dma_buffer_pool);
+    uvm_tracker_init(&dma_buffer->tracker);
+    INIT_LIST_HEAD(&dma_buffer->node);
+
+    status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE, dma_owner, NULL, &alloc);
+    if (status != NV_OK)
+        goto err;
+
+    dma_buffer->alloc = alloc;
+
+    status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
+    if (status != NV_OK)
+        goto err;
+
+    status = uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(auth_tags_size, dma_owner, NULL, &alloc);
+    if (status != NV_OK)
+        goto err;
+
+    dma_buffer->auth_tag = alloc;
+
+    status = uvm_mem_map_gpu_kernel(alloc, dma_owner);
+    if (status != NV_OK)
+        goto err;
+
+    *dma_buffer_out = dma_buffer;
+
+    return status;
+
+err:
+    dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
+    return status;
+}
+
+void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
+{
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+
+    if (dma_buffer_pool->num_dma_buffers == 0)
+        return;
+
+    uvm_mutex_lock(&dma_buffer_pool->lock);
+    list_for_each_entry(dma_buffer, &dma_buffer_pool->free_dma_buffers, node)
+        uvm_tracker_wait(&dma_buffer->tracker);
+    uvm_mutex_unlock(&dma_buffer_pool->lock);
+}
+
+static void conf_computing_dma_buffer_pool_deinit(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
+{
+    uvm_conf_computing_dma_buffer_t *dma_buffer;
+    uvm_conf_computing_dma_buffer_t *next_buff;
+
+    if (dma_buffer_pool->num_dma_buffers == 0)
+        return;
+
+    // Because the pool is teared down at the same time the GPU is unregistered
+    // the lock is required only to quiet assertions not for functional reasons
+    // see dma_buffer_destroy_locked()).
+    uvm_mutex_lock(&dma_buffer_pool->lock);
+
+    list_for_each_entry_safe(dma_buffer, next_buff, &dma_buffer_pool->free_dma_buffers, node) {
+        dma_buffer_destroy_locked(dma_buffer_pool, dma_buffer);
+        dma_buffer_pool->num_dma_buffers--;
+    }
+
+    UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
+    UVM_ASSERT(list_empty(&dma_buffer_pool->free_dma_buffers));
+    uvm_mutex_unlock(&dma_buffer_pool->lock);
+}
+
+static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                               uvm_conf_computing_dma_buffer_t *dma_buffer)
+{
+    uvm_assert_mutex_locked(&dma_buffer_pool->lock);
+    list_add_tail(&dma_buffer->node, &dma_buffer_pool->free_dma_buffers);
+}
+
+static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
+{
+    size_t i;
+    uvm_gpu_t *gpu;
+    size_t num_dma_buffers = 32;
+    NV_STATUS status = NV_OK;
+
+    UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
+
+    gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
+
+    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+
+    INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
+    uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
+    dma_buffer_pool->num_dma_buffers = num_dma_buffers;
+
+    uvm_mutex_lock(&dma_buffer_pool->lock);
+    for (i = 0; i < num_dma_buffers; i++) {
+        uvm_conf_computing_dma_buffer_t *dma_buffer;
+
+        status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
+        if (status != NV_OK)
+            break;
+
+        dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
+    }
+    uvm_mutex_unlock(&dma_buffer_pool->lock);
+
+    if (i < num_dma_buffers)
+        conf_computing_dma_buffer_pool_deinit(dma_buffer_pool);
+
+    return status;
+}
+
+static NV_STATUS dma_buffer_pool_expand_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
+{
+    size_t i;
+    uvm_gpu_t *gpu;
+    size_t nb_to_alloc;
+    NV_STATUS status = NV_OK;
+    UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
+
+    gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
+    nb_to_alloc = dma_buffer_pool->num_dma_buffers;
+    for (i = 0; i < nb_to_alloc; ++i) {
+        uvm_conf_computing_dma_buffer_t *dma_buffer;
+
+        status = dma_buffer_create(dma_buffer_pool, &dma_buffer);
+        if (status != NV_OK)
+            break;
+
+        dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
+    }
+
+    dma_buffer_pool->num_dma_buffers += i;
+
+    if (i == 0)
+        return status;
+
+    return NV_OK;
+}
+
+NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                              uvm_conf_computing_dma_buffer_t **dma_buffer_out,
+                                              uvm_tracker_t *out_tracker)
+{
+    uvm_conf_computing_dma_buffer_t *dma_buffer = NULL;
+    NV_STATUS status;
+
+    UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
+
+    // TODO: Bug 3385623: Heuristically expand DMA memory pool
+    uvm_mutex_lock(&dma_buffer_pool->lock);
+    if (list_empty(&dma_buffer_pool->free_dma_buffers)) {
+        status = dma_buffer_pool_expand_locked(dma_buffer_pool);
+
+        if (status != NV_OK) {
+            uvm_mutex_unlock(&dma_buffer_pool->lock);
+            return status;
+        }
+    }
+
+    // We're guaranteed that at least one DMA stage buffer is available at this
+    // point.
+    dma_buffer = list_first_entry(&dma_buffer_pool->free_dma_buffers, uvm_conf_computing_dma_buffer_t, node);
+    list_del_init(&dma_buffer->node);
+    uvm_mutex_unlock(&dma_buffer_pool->lock);
+
+    status = uvm_tracker_wait_for_other_gpus(&dma_buffer->tracker, dma_buffer->alloc->dma_owner);
+    if (status != NV_OK)
+        goto error;
+
+    if (out_tracker)
+        status = uvm_tracker_add_tracker_safe(out_tracker, &dma_buffer->tracker);
+    else
+        status = uvm_tracker_wait(&dma_buffer->tracker);
+
+    if (status != NV_OK)
+        goto error;
+
+    uvm_page_mask_zero(&dma_buffer->encrypted_page_mask);
+    *dma_buffer_out = dma_buffer;
+
+    return status;
+
+error:
+    uvm_tracker_deinit(&dma_buffer->tracker);
+    uvm_conf_computing_dma_buffer_free(dma_buffer_pool, dma_buffer, NULL);
+    return status;
+}
+
+void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                        uvm_conf_computing_dma_buffer_t *dma_buffer,
+                                        uvm_tracker_t *tracker)
+{
+
+    NV_STATUS status;
+
+    if (!dma_buffer)
+        return;
+
+    UVM_ASSERT(dma_buffer_pool->num_dma_buffers > 0);
+
+    uvm_tracker_remove_completed(&dma_buffer->tracker);
+    if (tracker) {
+        uvm_tracker_remove_completed(tracker);
+        status = uvm_tracker_add_tracker_safe(&dma_buffer->tracker, tracker);
+        if (status != NV_OK)
+            UVM_ASSERT(status == uvm_global_get_status());
+    }
+
+    uvm_mutex_lock(&dma_buffer_pool->lock);
+    dma_buffer_pool_add(dma_buffer_pool, dma_buffer);
+    uvm_mutex_unlock(&dma_buffer_pool->lock);
+}
+
+static void dummy_iv_mem_deinit(uvm_gpu_t *gpu)
+{
+    uvm_mem_free(gpu->conf_computing.iv_mem);
+}
+
+static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+
+    if (!uvm_conf_computing_mode_is_hcc(gpu))
+        return NV_OK;
+
+    status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
+    if (status != NV_OK)
+        return status;
+
+    status = uvm_mem_map_gpu_kernel(gpu->conf_computing.iv_mem, gpu);
+    if (status != NV_OK)
+        goto error;
+
+    return NV_OK;
+
+error:
+    dummy_iv_mem_deinit(gpu);
+    return status;
+}
+
+NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
+{
+    NV_STATUS status;
+
+    if (!uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
+    status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
+    if (status != NV_OK)
+        return status;
+
+    status = dummy_iv_mem_init(gpu);
+    if (status != NV_OK)
+        goto error;
+
+    return NV_OK;
+
+error:
+    uvm_conf_computing_gpu_deinit(gpu);
+    return status;
+}
+
+void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
+{
+    dummy_iv_mem_deinit(gpu);
+    conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
+}
+
+void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
+{
+    NV_STATUS status;
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+
+    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
+    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
+    // nvUvmInterfaceCslRotateIv.
+    UVM_ASSERT(status == NV_OK);
+}
+
+void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
+{
+    NV_STATUS status;
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+
+    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
+    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
+    // nvUvmInterfaceCslRotateIv.
+    UVM_ASSERT(status == NV_OK);
+}
+
+void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
+                                    void *dst_cipher,
+                                    const void *src_plain,
+                                    UvmCslIv *encrypt_iv,
+                                    size_t size,
+                                    void *auth_tag_buffer)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(size);
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+    status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
+                                      size,
+                                      (NvU8 const *) src_plain,
+                                      encrypt_iv,
+                                      (NvU8 *) dst_cipher,
+                                      (NvU8 *) auth_tag_buffer);
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+
+    // nvUvmInterfaceCslEncrypt fails when a 64-bit encryption counter
+    // overflows. This is not supposed to happen on CC.
+    UVM_ASSERT(status == NV_OK);
+}
+
+NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
+                                         void *dst_plain,
+                                         const void *src_cipher,
+                                         const UvmCslIv *src_iv,
+                                         size_t size,
+                                         const void *auth_tag_buffer)
+{
+    NV_STATUS status;
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+    status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
+                                      size,
+                                      (const NvU8 *) src_cipher,
+                                      src_iv,
+                                      (NvU8 *) dst_plain,
+                                      NULL,
+                                      0,
+                                      (const NvU8 *) auth_tag_buffer);
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+
+    return status;
+}
+
+NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
+                                           void *dst_plain,
+                                           const void *src_cipher,
+                                           const void *auth_tag_buffer,
+                                           NvU8 valid)
+{
+    NV_STATUS status;
+
+    // There is no dedicated lock for the CSL context associated with replayable
+    // faults. The mutual exclusion required by the RM CSL API is enforced by
+    // relying on the GPU replayable service lock (ISR lock), since fault
+    // decryption is invoked as part of fault servicing.
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+
+    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+
+    status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
+                                      parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
+                                      (const NvU8 *) src_cipher,
+                                      NULL,
+                                      (NvU8 *) dst_plain,
+                                      &valid,
+                                      sizeof(valid),
+                                      (const NvU8 *) auth_tag_buffer);
+
+    if (status != NV_OK)
+        UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
+
+    return status;
+}
+
+void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
+{
+    NV_STATUS status;
+
+    // See comment in uvm_conf_computing_fault_decrypt
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+
+    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+
+    status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
+                                          UVM_CSL_OPERATION_DECRYPT,
+                                          increment,
+                                          NULL);
+
+    UVM_ASSERT(status == NV_OK);
+}
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@@ -0,0 +1,202 @@
+/*******************************************************************************
+    Copyright (c) 2021-2023 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_CONF_COMPUTING_H__
+#define __UVM_CONF_COMPUTING_H__
+
+#include "nv_uvm_types.h"
+#include "uvm_forward_decl.h"
+#include "uvm_lock.h"
+#include "uvm_tracker.h"
+#include "uvm_va_block_types.h"
+
+#include "linux/list.h"
+
+#define UVM_CONF_COMPUTING_AUTH_TAG_SIZE (UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES)
+
+// An authentication tag pointer is required by HW to be 16-bytes aligned.
+#define UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT 16
+
+// An IV pointer is required by HW to be 16-bytes aligned.
+//
+// Use sizeof(UvmCslIv) to refer to the IV size.
+#define UVM_CONF_COMPUTING_IV_ALIGNMENT 16
+
+// SEC2 decrypt operation buffers are required to be 16-bytes aligned.
+#define UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT 16
+
+// CE encrypt/decrypt can be unaligned if the entire buffer lies in a single
+// 32B segment. Otherwise, it needs to be 32B aligned.
+#define UVM_CONF_COMPUTING_BUF_ALIGNMENT 32
+
+#define UVM_CONF_COMPUTING_DMA_BUFFER_SIZE UVM_VA_BLOCK_SIZE
+
+// SEC2 supports at most a stream of 64 entries in the method stream for
+// signing. Each entry is made of the method address and method data, therefore
+// the maximum buffer size is: UVM_METHOD_SIZE * 2 * 64 = 512.
+// UVM, however, won't use this amount of entries, in the worst case scenario,
+// we push a semaphore_releases or a decrypt. A SEC2 semaphore_release uses 6 1U
+// entries, whereas a SEC2 decrypt uses 10 1U entries. For 10 entries,
+// UVM_METHOD_SIZE * 2 * 10 = 80.
+#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
+
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
+
+bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
+bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
+bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
+
+typedef struct
+{
+    // List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
+    // A free DMA buffer can be grabbed anytime, though the tracker
+    // inside it may still have pending work.
+    struct list_head free_dma_buffers;
+
+    // Used to grow the pool when full.
+    size_t num_dma_buffers;
+
+    // Lock protecting the dma_buffer_pool
+    uvm_mutex_t lock;
+} uvm_conf_computing_dma_buffer_pool_t;
+
+typedef struct
+{
+    // Backing DMA allocation
+    uvm_mem_t *alloc;
+
+    // Used internally by the pool management code to track the state of
+    // a free buffer.
+    uvm_tracker_t tracker;
+
+    // When the DMA buffer is used as the destination of a GPU encryption, SEC2
+    // writes the authentication tag here. Later when the buffer is decrypted
+    // on the CPU the authentication tag is used again (read) for CSL to verify
+    // the authenticity. The allocation is big enough for one authentication
+    // tag per PAGE_SIZE page in the alloc buffer.
+    uvm_mem_t *auth_tag;
+
+    // CSL supports out-of-order decryption, the decrypt IV is used similarly
+    // to the authentication tag. The allocation is big enough for one IV per
+    // PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
+    // IV and authentication tag must match.
+    UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
+
+    // Bitmap of the encrypted pages in the backing allocation
+    uvm_page_mask_t encrypted_page_mask;
+
+    // See uvm_conf_computing_dma_pool lists
+    struct list_head node;
+} uvm_conf_computing_dma_buffer_t;
+
+// Retrieve a DMA buffer from the given DMA allocation pool.
+// NV_OK                Stage buffer successfully retrieved
+// NV_ERR_NO_MEMORY     No free DMA buffers are available for grab, and
+//                      expanding the memory pool to get new ones failed.
+//
+// out_dma_buffer is only valid if NV_OK is returned. The caller is responsible
+// for calling uvm_conf_computing_dma_buffer_free once the operations on this
+// buffer are done.
+// When out_tracker is passed to the function, the buffer's dependencies are
+// added to the tracker. The caller is guaranteed that all pending tracker
+// entries come from the same GPU as the pool's owner. Before being able to use
+// the DMA buffer, the caller is responsible for either acquiring or waiting
+// on out_tracker. If out_tracker is NULL, the wait happens in the allocation
+// itself.
+// Upon success the encrypted_page_mask is cleared as part of the allocation.
+NV_STATUS uvm_conf_computing_dma_buffer_alloc(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                              uvm_conf_computing_dma_buffer_t **out_dma_buffer,
+                                              uvm_tracker_t *out_tracker);
+
+// Free a DMA buffer to the DMA allocation pool. All DMA buffers must be freed
+// prior to GPU deinit.
+//
+// The tracker is optional and a NULL tracker indicates that no new operation
+// has been pushed for the buffer. A non-NULL tracker indicates any additional
+// pending operations on the buffer pushed by the caller that need to be
+// synchronized before freeing or re-using the buffer.
+void uvm_conf_computing_dma_buffer_free(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
+                                        uvm_conf_computing_dma_buffer_t *dma_buffer,
+                                        uvm_tracker_t *tracker);
+
+// Synchronize trackers in all entries in the GPU's DMA pool
+void uvm_conf_computing_dma_buffer_pool_sync(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool);
+
+
+// Initialization and deinitialization of Confidential Computing data structures
+// for the given GPU.
+NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
+void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
+
+// Logs encryption information from the GPU and returns the IV.
+void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
+
+// Acquires next CPU encryption IV and returns it.
+void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
+
+// CPU side encryption helper with explicit IV, which is obtained from
+// uvm_conf_computing_acquire_encryption_iv. Without an explicit IV
+// the function uses the next IV in order. Encrypts data in src_plain and
+// write the cipher text in dst_cipher. src_plain and dst_cipher can't overlap.
+// The IV is invalidated and can't be used again after this operation.
+void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
+                                    void *dst_cipher,
+                                    const void *src_plain,
+                                    UvmCslIv *encrypt_iv,
+                                    size_t size,
+                                    void *auth_tag_buffer);
+
+// CPU side decryption helper. Decrypts data from src_cipher and writes the
+// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
+// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
+NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
+                                         void *dst_plain,
+                                         const void *src_cipher,
+                                         const UvmCslIv *src_iv,
+                                         size_t size,
+                                         const void *auth_tag_buffer);
+
+// CPU decryption of a single replayable fault, encrypted by GSP-RM.
+//
+// Replayable fault decryption depends not only on the encrypted fault contents,
+// and the authentication tag, but also on the plaintext valid bit associated
+// with the fault.
+//
+// When decrypting data previously encrypted by the Copy Engine, use
+// uvm_conf_computing_cpu_decrypt instead.
+//
+// Locking: this function must be invoked while holding the replayable ISR lock.
+NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
+                                           void *dst_plain,
+                                           const void *src_cipher,
+                                           const void *auth_tag_buffer,
+                                           NvU8 valid);
+
+// Increment the CPU-side decrypt IV of the CSL context associated with
+// replayable faults. The function is a no-op if the given increment is zero.
+//
+// The IV associated with a fault CSL context is a 64-bit counter.
+//
+// Locking: this function must be invoked while holding the replayable ISR lock.
+void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
+#endif // __UVM_CONF_COMPUTING_H__
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -28,6 +28,8 @@ typedef struct uvm_global_struct uvm_global_t;

 typedef struct uvm_gpu_struct uvm_gpu_t;
 typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
+typedef struct uvm_gpu_chunk_struct uvm_gpu_chunk_t;
+typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
 typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
 typedef struct uvm_mem_struct uvm_mem_t;
 typedef struct uvm_host_hal_struct uvm_host_hal_t;
@@ -35,6 +37,7 @@ typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
 typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
 typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
 typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
+typedef struct uvm_sec2_hal_struct uvm_sec2_hal_t;
 typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
 typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
 typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
@@ -47,6 +50,7 @@ typedef struct uvm_channel_struct uvm_channel_t;
 typedef struct uvm_user_channel_struct uvm_user_channel_t;
 typedef struct uvm_push_struct uvm_push_t;
 typedef struct uvm_push_info_struct uvm_push_info_t;
+typedef struct uvm_push_crypto_bundle_struct uvm_push_crypto_bundle_t;
 typedef struct uvm_push_acquire_info_struct uvm_push_acquire_info_t;
 typedef struct uvm_pushbuffer_struct uvm_pushbuffer_t;
 typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
@@ -56,6 +60,7 @@ typedef struct uvm_va_range_struct uvm_va_range_t;
 typedef struct uvm_va_block_struct uvm_va_block_t;
 typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
 typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
+typedef struct uvm_va_block_retry_struct uvm_va_block_retry_t;
 typedef struct uvm_va_space_struct uvm_va_space_t;
 typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;

--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -71,11 +71,6 @@ static void uvm_unregister_callbacks(void)
    }
 }

-static void sev_init(const UvmPlatformInfo *platform_info)
-{
-    g_uvm_global.sev_enabled = platform_info->sevEnabled;
-}
-
 NV_STATUS uvm_global_init(void)
 {
    NV_STATUS status;
@@ -124,8 +119,7 @@ NV_STATUS uvm_global_init(void)

    uvm_ats_init(&platform_info);
    g_uvm_global.num_simulated_devices = 0;
-
-    sev_init(&platform_info);
+    g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;

    status = uvm_gpu_init();
    if (status != NV_OK) {
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2021 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -143,11 +143,16 @@ struct uvm_global_struct
        struct page *page;
    } unload_state;

-    // AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
-    // enabled. This field is set once during global initialization
-    // (uvm_global_init), and can be read afterwards without acquiring any
-    // locks.
-    bool sev_enabled;
+    // True if the VM has AMD's SEV, or equivalent HW security extensions such
+    // as Intel's TDX, enabled. The flag is always false on the host.
+    //
+    // This value moves in tandem with that of Confidential Computing in the
+    // GPU(s) in all supported configurations, so it is used as a proxy for the
+    // Confidential Computing state.
+    //
+    // This field is set once during global initialization (uvm_global_init),
+    // and can be read afterwards without acquiring any locks.
+    bool conf_computing_enabled;
 };

 // Initialize global uvm state
@@ -191,6 +196,16 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
    g_uvm_global.parent_gpus[gpu_index] = NULL;
 }

+// Get a parent gpu by its id.
+// Returns a pointer to the parent GPU object, or NULL if not found.
+//
+// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
+// retained at least one of the child GPUs.
+static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
+{
+    return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
+}
+
 // Get a gpu by its global id.
 // Returns a pointer to the GPU object, or NULL if not found.
 //
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -41,6 +41,7 @@
 #include "uvm_gpu_access_counters.h"
 #include "uvm_ats.h"
 #include "uvm_test.h"
+#include "uvm_conf_computing.h"

 #include "uvm_linux.h"

@@ -66,21 +67,6 @@ static uvm_user_channel_t *get_user_channel(uvm_rb_tree_node_t *node)
    return container_of(node, uvm_user_channel_t, instance_ptr.node);
 }

-static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
-{
-    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
-
-    parent_gpu->rm_info = *gpu_info;
-
-    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
-    snprintf(parent_gpu->name,
-             sizeof(parent_gpu->name),
-             "ID %u: %s: %s",
-             uvm_id_value(parent_gpu->id),
-             parent_gpu->rm_info.name,
-             uuid_buffer);
-}
-
 static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
 {
    switch (link_type) {
@@ -101,39 +87,61 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
    }
 }

-static NV_STATUS get_gpu_caps(uvm_parent_gpu_t *parent_gpu)
+static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
+{
+    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+
+    parent_gpu->rm_info = *gpu_info;
+
+    parent_gpu->system_bus.link = get_gpu_link_type(gpu_info->sysmemLink);
+    UVM_ASSERT(parent_gpu->system_bus.link != UVM_GPU_LINK_INVALID);
+
+    parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
+
+    if (gpu_info->systemMemoryWindowSize > 0) {
+        // memory_window_end is inclusive but uvm_gpu_is_coherent() checks
+        // memory_window_end > memory_window_start as its condition.
+        UVM_ASSERT(gpu_info->systemMemoryWindowSize > 1);
+        parent_gpu->system_bus.memory_window_start = gpu_info->systemMemoryWindowStart;
+        parent_gpu->system_bus.memory_window_end   = gpu_info->systemMemoryWindowStart +
+                                                     gpu_info->systemMemoryWindowSize - 1;
+    }
+
+    parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
+
+    // nvswitch is routed via physical pages, where the upper 13-bits of the
+    // 47-bit address space holds the routing information for each peer.
+    // Currently, this is limited to a 16GB framebuffer window size.
+    if (parent_gpu->nvswitch_info.is_nvswitch_connected)
+        parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
+
+    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
+    snprintf(parent_gpu->name,
+             sizeof(parent_gpu->name),
+             "ID %u: %s: %s",
+             uvm_id_value(parent_gpu->id),
+             parent_gpu->rm_info.name,
+             uuid_buffer);
+}
+
+static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
 {
    NV_STATUS status;
    UvmGpuCaps gpu_caps;

    memset(&gpu_caps, 0, sizeof(gpu_caps));

-    status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(parent_gpu->rm_device, &gpu_caps));
+    status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(uvm_gpu_device_handle(gpu), &gpu_caps));
    if (status != NV_OK)
        return status;

-    parent_gpu->sysmem_link = get_gpu_link_type(gpu_caps.sysmemLink);
-    UVM_ASSERT(parent_gpu->sysmem_link != UVM_GPU_LINK_INVALID);
-
-    parent_gpu->sysmem_link_rate_mbyte_per_s = gpu_caps.sysmemLinkRateMBps;
-    parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_caps.connectedToSwitch;
-
-    // nvswitch is routed via physical pages, where the upper 13-bits of the
-    // 47-bit address space holds the routing information for each peer.
-    // Currently, this is limited to a 16GB framebuffer window size.
-    if (parent_gpu->nvswitch_info.is_nvswitch_connected)
-        parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_caps.nvswitchMemoryWindowStart;
-
    if (gpu_caps.numaEnabled) {
-        parent_gpu->numa_info.enabled = true;
-        parent_gpu->numa_info.node_id = gpu_caps.numaNodeId;
-        parent_gpu->numa_info.system_memory_window_start = gpu_caps.systemMemoryWindowStart;
-        parent_gpu->numa_info.system_memory_window_end = gpu_caps.systemMemoryWindowStart +
-                                                         gpu_caps.systemMemoryWindowSize -
-                                                         1;
+        UVM_ASSERT(uvm_gpu_is_coherent(gpu->parent));
+        gpu->mem_info.numa.enabled = true;
+        gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
    }
    else {
-        UVM_ASSERT(!g_uvm_global.ats.enabled);
+        UVM_ASSERT(!uvm_gpu_is_coherent(gpu->parent));
    }

    return NV_OK;
@@ -210,27 +218,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
    return parent_gpu->rm_info.subdeviceCount == 1;
 }

-static bool parent_gpu_uses_canonical_form_address(uvm_parent_gpu_t *parent_gpu)
+static bool platform_uses_canonical_form_address(void)
 {
-    NvU64 gpu_addr_shift;
-    NvU64 cpu_addr_shift;
-
-    // PPC64LE doesn't use canonical form addresses.
    if (NVCPU_IS_PPC64LE)
        return false;

-    // We use big_page_size as UVM_PAGE_SIZE_64K because num_va_bits() is
-    // big_page_size invariant in the MMU HAL.
-    UVM_ASSERT(!parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K) ||
-               (parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits() ==
-                parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K)->num_va_bits()));
-
-    gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
-    cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
-
-    // Refer to the comments and diagram in uvm_gpu.c:uvm_gpu_can_address().
-    return gpu_addr_shift >= cpu_addr_shift;
-
+    return true;
 }

 bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
@@ -239,6 +232,9 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    // the canonical address form.
    NvU64 max_va_lower;
    NvU64 addr_end = addr + size - 1;
+    NvU8 gpu_addr_shift;
+    NvU8 cpu_addr_shift;
+    NvU8 addr_shift;

    // Watch out for calling this too early in init
    UVM_ASSERT(gpu->address_space_tree.hal);
@@ -246,6 +242,10 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    UVM_ASSERT(addr <= addr_end);
    UVM_ASSERT(size > 0);

+    gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
+    cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
+    addr_shift = gpu_addr_shift;
+
    // Pascal+ GPUs are capable of accessing kernel pointers in various modes
    // by applying the same upper-bit checks that x86, ARM, and Power
    // processors do. x86 and ARM use canonical form addresses. For ARM, even
@@ -255,13 +255,15 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    // mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
    // (C) regions are only accessible by the CPU. Similarly, (G) regions
    // are only accessible by the GPU. (X) regions are not addressible.
+    // Note that we only consider (V) regions, i.e., address ranges that are
+    // addressable by both, the CPU and GPU.
    //
    //               GPU MAX VA < CPU MAX VA           GPU MAX VA >= CPU MAX VA
    //          0xF..F +----------------+          0xF..F +----------------+
-    //                 |CCCCCCCCCCCCCCCC|                 |VVVVVVVVVVVVVVVV|
-    //                 |CCCCCCCCCCCCCCCC|                 |VVVVVVVVVVVVVVVV|
-    //                 |CCCCCCCCCCCCCCCC|                 |VVVVVVVVVVVVVVVV|
-    //                 |CCCCCCCCCCCCCCCC| CPU MIN UPPER VA|----------------|
+    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
+    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
+    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
+    // GPU MIN UPPER VA|----------------| CPU MIN UPPER VA|----------------|
    //                 |CCCCCCCCCCCCCCCC|                 |GGGGGGGGGGGGGGGG|
    //                 |CCCCCCCCCCCCCCCC|                 |GGGGGGGGGGGGGGGG|
    // CPU MIN UPPER VA|----------------| GPU MIN UPPER VA|----------------|
@@ -270,32 +272,83 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    // CPU MAX LOWER VA|----------------| GPU MAX LOWER VA|----------------|
    //                 |CCCCCCCCCCCCCCCC|                 |GGGGGGGGGGGGGGGG|
    //                 |CCCCCCCCCCCCCCCC|                 |GGGGGGGGGGGGGGGG|
-    //       GPU MAX VA|----------------| CPU MAX LOWER VA|----------------|
+    // GPU MAX LOWER VA|----------------| CPU MAX LOWER VA|----------------|
    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
    //               0 +----------------+               0 +----------------+

-    if (parent_gpu_uses_canonical_form_address(gpu->parent)) {
-        NvU64 min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - gpu->address_space_tree.hal->num_va_bits()));
-        max_va_lower = 1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1);
+    // On canonical form address platforms and Pascal+ GPUs.
+    if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
+        NvU64 min_va_upper;
+
+        // On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
+        // 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
+        // wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
+        // See more details on uvm_parent_gpu_canonical_address(..);
+        if (cpu_addr_shift > gpu_addr_shift)
+            addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
+        else if (gpu_addr_shift == 57)
+            addr_shift = gpu_addr_shift;
+        else
+            addr_shift = cpu_addr_shift;
+
+        min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
+        max_va_lower = 1ULL << (addr_shift - 1);
        return (addr_end < max_va_lower) || (addr >= min_va_upper);
    }
    else {
-        max_va_lower = 1ULL << gpu->address_space_tree.hal->num_va_bits();
+        max_va_lower = 1ULL << addr_shift;
        return addr_end < max_va_lower;
    }
 }

+// The internal UVM VAS does not use canonical form addresses.
+bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
+{
+    NvU64 addr_end = addr + size - 1;
+    NvU64 max_gpu_va;
+
+    // Watch out for calling this too early in init
+    UVM_ASSERT(gpu->address_space_tree.hal);
+    UVM_ASSERT(gpu->address_space_tree.hal->num_va_bits() < 64);
+    UVM_ASSERT(addr <= addr_end);
+    UVM_ASSERT(size > 0);
+
+    max_gpu_va = 1ULL << gpu->address_space_tree.hal->num_va_bits();
+    return addr_end < max_gpu_va;
+}
+
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
 {
-    NvU32 gpu_va_bits;
-    NvU32 shift;
+    NvU8 gpu_addr_shift;
+    NvU8 cpu_addr_shift;
+    NvU8 addr_shift;
+    NvU64 input_addr = addr;

-    if (parent_gpu_uses_canonical_form_address(parent_gpu)) {
-        gpu_va_bits =  parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
-        shift = 64 - gpu_va_bits;
-        addr = (NvU64)((NvS64)(addr << shift) >> shift);
+    if (platform_uses_canonical_form_address()) {
+        // When the CPU VA width is larger than GPU's, it means that:
+        // On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
+        // On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
+        // We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
+        // behavior of CPUs with smaller (than GPU) VA widths.
+        gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
+        cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
+
+        if (cpu_addr_shift > gpu_addr_shift)
+            addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
+        else if (gpu_addr_shift == 57)
+            addr_shift = gpu_addr_shift;
+        else
+            addr_shift = cpu_addr_shift;
+
+        addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
+
+        // This protection acts on when the address is not covered by the GPU's
+        // OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
+        // permissive (NO_CHECK) mode.
+        if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
+            return input_addr;
    }

    return addr;
@@ -304,26 +357,30 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
 static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
 {
    NvU32 i;
-    UvmGpuCopyEnginesCaps ces_caps;
+    UvmGpuCopyEnginesCaps *ces_caps;
    NV_STATUS status;

-    memset(&ces_caps, 0, sizeof(ces_caps));
-    status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), &ces_caps));
+    ces_caps = uvm_kvmalloc_zero(sizeof(*ces_caps));
+    if (!ces_caps) {
+        UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (no memory)\n");
+        return;
+    }

+    status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), ces_caps));
    if (status != NV_OK) {
        UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (query failed)\n");
-        return;
+        goto out;
    }

    UVM_SEQ_OR_DBG_PRINT(s, "supported_ces:\n");
    for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
-        UvmGpuCopyEngineCaps *ce_caps = ces_caps.copyEngineCaps + i;
+        UvmGpuCopyEngineCaps *ce_caps = ces_caps->copyEngineCaps + i;

        if (!ce_caps->supported)
            continue;

-        UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u nvlink p2p %u "
-                             "p2p %u\n",
+        UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
+                             "nvlink p2p %u p2p %u\n",
                             i,
                             ce_caps->cePceMask,
                             ce_caps->grce,
@@ -334,6 +391,9 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
                             ce_caps->nvlinkP2p,
                             ce_caps->p2p);
    }
+
+out:
+    uvm_kvfree(ces_caps);
 }

 static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
@@ -368,7 +428,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
 static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
 {
    const UvmGpuInfo *gpu_info = &gpu->parent->rm_info;
-    uvm_numa_info_t *numa_info = &gpu->parent->numa_info;
    NvU64 num_pages_in;
    NvU64 num_pages_out;
    NvU64 mapped_cpu_pages_size;
@@ -387,9 +446,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
        return;

    UVM_SEQ_OR_DBG_PRINT(s, "CPU link type                          %s\n",
-                         uvm_gpu_link_type_string(gpu->parent->sysmem_link));
+                         uvm_gpu_link_type_string(gpu->parent->system_bus.link));
    UVM_SEQ_OR_DBG_PRINT(s, "CPU link bandwidth                     %uMBps\n",
-                         gpu->parent->sysmem_link_rate_mbyte_per_s);
+                         gpu->parent->system_bus.link_rate_mbyte_per_s);

    UVM_SEQ_OR_DBG_PRINT(s, "architecture                           0x%X\n", gpu_info->gpuArch);
    UVM_SEQ_OR_DBG_PRINT(s, "implementation                         0x%X\n", gpu_info->gpuImplementation);
@@ -411,13 +470,13 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                         gpu->mem_info.max_allocatable_address,
                         gpu->mem_info.max_allocatable_address / (1024 * 1024));

-    if (numa_info->enabled) {
-        NvU64 window_size = numa_info->system_memory_window_end - numa_info->system_memory_window_start + 1;
-        UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id                           %u\n", numa_info->node_id);
-        UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_start             0x%llx\n",
-                             numa_info->system_memory_window_start);
-        UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_end               0x%llx\n",
-                             numa_info->system_memory_window_end);
+    if (gpu->mem_info.numa.enabled) {
+        NvU64 window_size = gpu->parent->system_bus.memory_window_end - gpu->parent->system_bus.memory_window_start + 1;
+        UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id                           %u\n", uvm_gpu_numa_node(gpu));
+        UVM_SEQ_OR_DBG_PRINT(s, "memory_window_start                    0x%llx\n",
+                             gpu->parent->system_bus.memory_window_start);
+        UVM_SEQ_OR_DBG_PRINT(s, "memory_window_end                      0x%llx\n",
+                             gpu->parent->system_bus.memory_window_end);
        UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_size              0x%llx (%llu MBs)\n",
                             window_size,
                             window_size / (1024 * 1024));
@@ -508,6 +567,10 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)

    gpu_info_print_ce_caps(gpu, s);

+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        UVM_SEQ_OR_DBG_PRINT(s, "dma_buffer_pool_num_buffers             %lu\n",
+                             gpu->conf_computing.dma_buffer_pool.num_dma_buffers);
+    }
 }

 static void
@@ -801,7 +864,7 @@ static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
    proc_remove(peer_caps->procfs.peer_file[1]);
 }

-static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
+static NV_STATUS init_semaphore_pools(uvm_gpu_t *gpu)
 {
    NV_STATUS status;
    uvm_gpu_t *other_gpu;
@@ -810,7 +873,17 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
    if (status != NV_OK)
        return status;

+    // When the Confidential Computing feature is enabled, a separate secure
+    // pool is created that holds page allocated in the CPR of vidmem.
+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        status = uvm_gpu_semaphore_secure_pool_create(gpu, &gpu->secure_semaphore_pool);
+        if (status != NV_OK)
+            return status;
+    }
+
    for_each_global_gpu(other_gpu) {
+        if (uvm_conf_computing_mode_enabled(gpu))
+            break;
        if (other_gpu == gpu)
            continue;
        status = uvm_gpu_semaphore_pool_map_gpu(other_gpu->semaphore_pool, gpu);
@@ -821,7 +894,7 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
    return NV_OK;
 }

-static void deinit_semaphore_pool(uvm_gpu_t *gpu)
+static void deinit_semaphore_pools(uvm_gpu_t *gpu)
 {
    uvm_gpu_t *other_gpu;

@@ -832,6 +905,7 @@ static void deinit_semaphore_pool(uvm_gpu_t *gpu)
    }

    uvm_gpu_semaphore_pool_destroy(gpu->semaphore_pool);
+    uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
 }

 static NV_STATUS find_unused_global_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t *out_id)
@@ -866,6 +940,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
                                  uvm_parent_gpu_t **parent_gpu_out)
 {
    uvm_parent_gpu_t *parent_gpu;
+    NV_STATUS status;

    parent_gpu = uvm_kvmalloc_zero(sizeof(*parent_gpu));
    if (!parent_gpu)
@@ -882,11 +957,14 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
    uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
    uvm_rb_tree_init(&parent_gpu->tsg_table);

+    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
+    status = errno_to_nv_status(nv_kthread_q_init(&parent_gpu->lazy_free_q, "vidmem lazy free"));
+
    nv_kref_init(&parent_gpu->gpu_kref);

    *parent_gpu_out = parent_gpu;

-    return NV_OK;
+    return status;
 }

 // Allocates a uvm_gpu_t struct and initializes the basic fields and leaves all
@@ -1021,6 +1099,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
        return status;
    }

+    uvm_conf_computing_check_parent_gpu(parent_gpu);
+
    parent_gpu->pci_dev = gpu_platform_info->pci_dev;
    parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
    parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
@@ -1056,12 +1136,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,

    uvm_mmu_init_gpu_chunk_sizes(parent_gpu);

-    status = get_gpu_caps(parent_gpu);
-    if (status != NV_OK) {
-        UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
-        return status;
-    }
-
    status = uvm_ats_add_gpu(parent_gpu);
    if (status != NV_OK) {
        UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
@@ -1120,6 +1194,12 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
        return status;
    }

+    status = get_gpu_caps(gpu);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
+        return status;
+    }
+
    uvm_mmu_init_gpu_peer_addresses(gpu);

    status = alloc_and_init_address_space(gpu);
@@ -1152,7 +1232,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
        return status;
    }

-    status = init_semaphore_pool(gpu);
+    status = init_semaphore_pools(gpu);
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to initialize the semaphore pool: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -1182,6 +1262,14 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
        return status;
    }

+    status = uvm_conf_computing_gpu_init(gpu);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Failed to initialize Confidential Compute: %s for GPU %s\n",
+                      nvstatusToString(status),
+                      uvm_gpu_name(gpu));
+        return status;
+    }
+
    status = init_procfs_files(gpu);
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
@@ -1357,6 +1445,8 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
    // Sync all trackers in PMM
    uvm_pmm_gpu_sync(&gpu->pmm);

+    // Sync all trackers in the GPU's DMA allocation pool
+    uvm_conf_computing_dma_buffer_pool_sync(&gpu->conf_computing.dma_buffer_pool);
 }

 // Remove all references to the given GPU from its parent, since it is being
@@ -1439,7 +1529,7 @@ static void deinit_gpu(uvm_gpu_t *gpu)
    // pain during development.
    deconfigure_address_space(gpu);

-    deinit_semaphore_pool(gpu);
+    deinit_semaphore_pools(gpu);

    uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);

@@ -1490,6 +1580,13 @@ static void remove_gpu(uvm_gpu_t *gpu)
    if (free_parent)
        destroy_nvlink_peers(gpu);

+    // uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
+    // deinitialization must be called before the GPU is removed from the global
+    // table.
+    //
+    // TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
+    uvm_conf_computing_gpu_deinit(gpu);
+
    // TODO: Bug 2844714: If the parent is not being freed, the following
    // gpu_table_lock is only needed to protect concurrent
    // find_first_valid_gpu() in BH from the __clear_bit here. After
@@ -1539,6 +1636,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
    UVM_ASSERT(parent_gpu->num_retained_gpus == 0);
    UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS));

+    nv_kthread_q_stop(&parent_gpu->lazy_free_q);
+
    for (sub_processor_index = 0; sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
        UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);

@@ -2180,8 +2279,8 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
    peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);

    if (peer_caps->is_indirect_peer) {
-        UVM_ASSERT(gpu0->parent->numa_info.enabled);
-        UVM_ASSERT(gpu1->parent->numa_info.enabled);
+        UVM_ASSERT(gpu0->mem_info.numa.enabled);
+        UVM_ASSERT(gpu1->mem_info.numa.enabled);

        status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
        if (status != NV_OK)
@@ -2370,8 +2469,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)

        // Indirect peers are only supported when onlined as NUMA nodes, because
        // we want to use vm_insert_page and dma_map_page.
-        if (p2p_caps_params.indirectAccess &&
-            (!gpu->parent->numa_info.enabled || !other_gpu->parent->numa_info.enabled))
+        if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
            continue;

        status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
@@ -2553,7 +2651,13 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
 uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
 {
    // See comment in page_tree_set_location
-    return uvm_gpu_is_virt_mode_sriov_heavy(gpu)? UVM_APERTURE_VID : UVM_APERTURE_DEFAULT;
+    if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+        return UVM_APERTURE_VID;
+
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return UVM_APERTURE_VID;
+
+    return UVM_APERTURE_DEFAULT;
 }

 uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
@@ -2964,9 +3068,6 @@ NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
 exit_unlock:
    uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);

-    if (status == NV_OK)
-        UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
-
    return status;
 }

@@ -3005,9 +3106,6 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
 exit_unlock:
    uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);

-    if (status == NV_OK)
-        UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
-
    return status;
 }

--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -46,6 +46,8 @@
 #include "uvm_rb_tree.h"
 #include "uvm_perf_prefetch.h"
 #include "nv-kthread-q.h"
+#include <linux/mmu_notifier.h>
+#include "uvm_conf_computing.h"

 // Buffer length to store uvm gpu id, RM device name and gpu uuid.
 #define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
@@ -133,6 +135,12 @@ struct uvm_service_block_context_struct

        // This is set if the page migrated to/from the GPU and CPU.
        bool did_migrate;
+
+        // Sequence number used to start a mmu notifier read side critical
+        // section.
+        unsigned long notifier_seq;
+
+        struct vm_fault *vmf;
    } cpu_fault;

    //
@@ -168,6 +176,71 @@ struct uvm_service_block_context_struct
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
 };

+typedef struct
+{
+    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
+    // VMA. Used for batching ATS faults in a vma.
+    uvm_page_mask_t read_fault_mask;
+
+    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
+    // SAM VMA. Used for batching ATS faults in a vma.
+    uvm_page_mask_t write_fault_mask;
+
+    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
+    // of a SAM VMA. Used to return ATS fault status.
+    uvm_page_mask_t faults_serviced_mask;
+
+    // Mask of successfully serviced read faults on pages in write_fault_mask.
+    uvm_page_mask_t reads_serviced_mask;
+
+    // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
+    // SAM VMA. This is used as input to the prefetcher.
+    uvm_page_mask_t faulted_mask;
+
+    // Client type of the service requestor.
+    uvm_fault_client_type_t client_type;
+
+    // New residency ID of the faulting region.
+    uvm_processor_id_t residency_id;
+
+    // New residency NUMA node ID of the faulting region.
+    int residency_node;
+
+    struct
+    {
+        // True if preferred_location was set on this faulting region.
+        // UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
+        // VMA is is prefetched if preferred_location was set and if first_touch
+        // is true;
+        bool has_preferred_location;
+
+        // True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
+        // node. False if any page in the region is resident somewhere.
+        bool first_touch;
+
+        // Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
+        // SAM VMA.
+        uvm_page_mask_t prefetch_pages_mask;
+
+        // PFN info of the faulting region
+        unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];
+
+        // Faulting/preferred processor residency mask of the faulting region.
+        uvm_page_mask_t residency_mask;
+
+#if defined(NV_MMU_INTERVAL_NOTIFIER)
+        // MMU notifier used to compute residency of this faulting region.
+        struct mmu_interval_notifier notifier;
+#endif
+
+        uvm_va_space_t *va_space;
+
+        // Prefetch temporary state.
+        uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
+    } prefetch_state;
+
+} uvm_ats_fault_context_t;
+
 struct uvm_fault_service_batch_context_struct
 {
    // Array of elements fetched from the GPU fault buffer. The number of
@@ -200,6 +273,8 @@ struct uvm_fault_service_batch_context_struct

    NvU32 num_replays;

+    uvm_ats_fault_context_t ats_context;
+
    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;

@@ -338,6 +413,9 @@ typedef struct
        // Unique id (per-GPU) generated for tools events recording
        NvU32 batch_id;

+        // Information required to service ATS faults.
+        uvm_ats_fault_context_t ats_context;
+
        // Information required to invalidate stale ATS PTEs from the GPU TLBs
        uvm_ats_fault_invalidate_t ats_invalidate;
    } non_replayable;
@@ -349,22 +427,6 @@ typedef struct
    NvU64 disable_prefetch_faults_timestamp;
 } uvm_fault_buffer_info_t;

-typedef struct
-{
-    // True if the platform supports HW coherence (P9) and RM has exposed the
-    // GPU's memory as a NUMA node to the kernel.
-    bool enabled;
-
-    // Range in the system physical address space where the memory of this GPU
-    // is mapped
-    NvU64 system_memory_window_start;
-    NvU64 system_memory_window_end;
-
-    NvU64 memblock_size;
-
-    unsigned node_id;
-} uvm_numa_info_t;
-
 struct uvm_access_counter_service_batch_context_struct
 {
    uvm_access_counter_buffer_entry_t *notification_cache;
@@ -386,7 +448,8 @@ struct uvm_access_counter_service_batch_context_struct
        // Virtual address notifications are always aligned to 64k. This means up to 16
        // different physical locations could have been accessed to trigger one notification.
        // The sub-granularity mask can correspond to any of them.
-        struct {
+        struct
+        {
            uvm_processor_id_t resident_processors[16];
            uvm_gpu_phys_address_t phys_addresses[16];
            uvm_access_counter_buffer_entry_t phys_entry;
@@ -501,6 +564,10 @@ typedef struct

    // Page tables with the mapping.
    uvm_page_table_range_vec_t *range_vec;
+
+    // Used during init to indicate whether the mapping has been fully
+    // initialized.
+    bool ready;
 } uvm_gpu_identity_mapping_t;

 // Root chunk mapping
@@ -581,6 +648,14 @@ struct uvm_gpu_struct
        // Max (inclusive) physical address of this GPU's memory that the driver
        // can allocate through PMM (PMA).
        NvU64 max_allocatable_address;
+
+        struct
+        {
+            // True if the platform supports HW coherence and the GPU's memory
+            // is exposed as a NUMA node to the kernel.
+            bool enabled;
+            unsigned int node_id;
+        } numa;
    } mem_info;

    struct
@@ -637,6 +712,8 @@ struct uvm_gpu_struct

    uvm_gpu_semaphore_pool_t *semaphore_pool;

+    uvm_gpu_semaphore_pool_t *secure_semaphore_pool;
+
    uvm_channel_manager_t *channel_manager;

    uvm_pmm_gpu_t pmm;
@@ -696,6 +773,25 @@ struct uvm_gpu_struct
    // mappings (instead of kernel), and it is used in most configurations.
    uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;

+    struct
+    {
+        uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
+
+        // Dummy memory used to store the IV contents during CE encryption.
+        // This memory location is also only available after CE channels
+        // because we use them to write PTEs for allocations such as this one.
+        // This location is used when a physical addressing for the IV buffer
+        // is required. See uvm_hal_hopper_ce_encrypt().
+        uvm_mem_t *iv_mem;
+
+        // Dummy memory used to store the IV contents during CE encryption.
+        // Because of the limitations of `iv_mem', and the need to have such
+        // buffer at channel initialization, we use an RM allocation.
+        // This location is used when a virtual addressing for the IV buffer
+        // is required. See uvm_hal_hopper_ce_encrypt().
+        uvm_rm_mem_t *iv_rm_mem;
+    } conf_computing;
+
    // ECC handling
    // In order to trap ECC errors as soon as possible the driver has the hw
    // interrupt register mapped directly. If an ECC interrupt is ever noticed
@@ -833,6 +929,10 @@ struct uvm_parent_gpu_struct
    uvm_arch_hal_t *arch_hal;
    uvm_fault_buffer_hal_t *fault_buffer_hal;
    uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
+    uvm_sec2_hal_t *sec2_hal;
+
+    // Whether CE supports physical addressing mode for writes to vidmem
+    bool ce_phys_vidmem_write_supported;

    uvm_gpu_peer_copy_mode_t peer_copy_mode;

@@ -954,8 +1054,9 @@ struct uvm_parent_gpu_struct
    // Fault buffer info. This is only valid if supports_replayable_faults is set to true
    uvm_fault_buffer_info_t fault_buffer_info;

-    // NUMA info, mainly for ATS
-    uvm_numa_info_t numa_info;
+    // PMM lazy free processing queue.
+    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
+    nv_kthread_q_t lazy_free_q;

    // Access counter buffer info. This is only valid if supports_access_counters is set to true
    uvm_access_counter_buffer_info_t access_counter_buffer_info;
@@ -1045,8 +1146,22 @@ struct uvm_parent_gpu_struct
        NvU64 fabric_memory_window_start;
    } nvswitch_info;

-    uvm_gpu_link_type_t sysmem_link;
-    NvU32 sysmem_link_rate_mbyte_per_s;
+    struct
+    {
+        // Note that this represents the link to system memory, not the link the
+        // system used to discover the GPU. There are some cases such as NVLINK2
+        // where the GPU is still on the PCIe bus, but it accesses memory over
+        // this link rather than PCIe.
+        uvm_gpu_link_type_t link;
+        NvU32 link_rate_mbyte_per_s;
+
+        // Range in the system physical address space where the memory of this
+        // GPU is exposed as coherent. memory_window_end is inclusive.
+        // memory_window_start == memory_window_end indicates that no window is
+        // present (coherence is not supported).
+        NvU64 memory_window_start;
+        NvU64 memory_window_end;
+    } system_bus;
 };

 static const char *uvm_gpu_name(uvm_gpu_t *gpu)
@@ -1120,7 +1235,8 @@ struct uvm_gpu_peer_struct
    // deletion.
    NvHandle p2p_handle;

-    struct {
+    struct
+    {
        struct proc_dir_entry *peer_file[2];
        struct proc_dir_entry *peer_symlink_file[2];

@@ -1141,23 +1257,20 @@ NV_STATUS uvm_gpu_init_va_space(uvm_va_space_t *va_space);

 void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);

-static uvm_numa_info_t *uvm_gpu_numa_info(uvm_gpu_t *gpu)
+static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
 {
-    UVM_ASSERT(gpu->parent->numa_info.enabled);
-
-    return &gpu->parent->numa_info;
+    UVM_ASSERT(gpu->mem_info.numa.enabled);
+    return gpu->mem_info.numa.node_id;
 }

 static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struct page *page)
 {
-    uvm_numa_info_t *numa_info = uvm_gpu_numa_info(gpu);
-
    unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
-    unsigned long gpu_offset = sys_addr - numa_info->system_memory_window_start;
+    unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;

-    UVM_ASSERT(page_to_nid(page) == numa_info->node_id);
-    UVM_ASSERT(sys_addr >= numa_info->system_memory_window_start);
-    UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= numa_info->system_memory_window_end);
+    UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
+    UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
+    UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);

    return uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_offset);
 }
@@ -1265,8 +1378,8 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
    uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);

    if (peer_caps->link_type != UVM_GPU_LINK_INVALID && peer_caps->is_indirect_peer) {
-        UVM_ASSERT(gpu0->parent->numa_info.enabled);
-        UVM_ASSERT(gpu1->parent->numa_info.enabled);
+        UVM_ASSERT(gpu0->mem_info.numa.enabled);
+        UVM_ASSERT(gpu1->mem_info.numa.enabled);
        UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_PCIE);
        UVM_ASSERT(!uvm_gpus_are_nvswitch_connected(gpu0, gpu1));
        return true;
@@ -1286,6 +1399,9 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
    UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
    UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);

+    if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
+        UVM_ASSERT(gpu->static_flat_mapping.ready);
+
    return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
 }

@@ -1303,6 +1419,23 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu
    return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
 }

+// Given a GPU or CPU physical address (not peer), retrieve an address suitable
+// for CE access.
+static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
+{
+    UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
+
+    if (phys_addr.aperture == UVM_APERTURE_VID) {
+        if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
+            return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
+    }
+    else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
+        return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
+    }
+
+    return uvm_gpu_address_from_phys(phys_addr);
+}
+
 static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_gpu_id_t peer_id)
 {
    return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
@@ -1364,10 +1497,25 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
 // The GPU must be initialized before calling this function.
 bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

+// Returns whether the given range is within the GPU's addressable VA ranges in
+// the internal GPU VA "kernel" address space, which is a linear address space.
+// Therefore, the input 'addr' must not be in canonical form, even platforms
+// that use to the canonical form addresses, i.e., ARM64, and x86.
+// Warning: This only checks whether the GPU's MMU can support the given
+// address. Some HW units on that GPU might only support a smaller range.
+//
+// The GPU must be initialized before calling this function.
+bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
+
 // Returns addr's canonical form for host systems that use canonical form
 // addresses.
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);

+static bool uvm_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
+{
+    return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
+}
+
 static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
 {
    return gpu->parent->max_host_va > (1ull << 40);
@@ -1431,6 +1579,7 @@ typedef enum
 {
    UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT,
    UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
+    UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
 } uvm_gpu_buffer_flush_mode_t;

 #endif // __UVM_GPU_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -210,13 +210,13 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
            *bytes = 64 * 1024ULL;
            break;
        case UVM_ACCESS_COUNTER_GRANULARITY_2M:
-            *bytes = 2 * 1024 * 1024ULL;
+            *bytes = 2 * UVM_SIZE_1MB;
            break;
        case UVM_ACCESS_COUNTER_GRANULARITY_16M:
-            *bytes = 16 * 1024 * 1024ULL;
+            *bytes = 16 * UVM_SIZE_1MB;
            break;
        case UVM_ACCESS_COUNTER_GRANULARITY_16G:
-            *bytes = 16 * 1024 * 1024 * 1024ULL;
+            *bytes = 16 * UVM_SIZE_1GB;
            break;
        default:
            return NV_ERR_INVALID_ARGUMENT;
@@ -404,7 +404,8 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);

    status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
-                                                                 &access_counters->rm_info));
+                                                                 &access_counters->rm_info,
+                                                                 0));
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -707,6 +708,7 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl
    UVM_ASSERT(gpu->parent->access_counters_supported);

    // Read PUT pointer from the GPU if requested
+    UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
    if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT)
        access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);

@@ -1007,6 +1009,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
        bool read_duplicate = false;
        uvm_processor_id_t new_residency;
+        const uvm_va_policy_t *policy;

        // Ensure that the migratability iterator covers the current address
        while (iter.end < address)
@@ -1033,21 +1036,23 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,

        // If the underlying VMA is gone, skip HMM migrations.
        if (uvm_va_block_is_hmm(va_block)) {
-            status = uvm_hmm_find_vma(&service_context->block_context, address);
+            status = uvm_hmm_find_vma(service_context->block_context.mm,
+                                      &service_context->block_context.hmm.vma,
+                                      address);
            if (status == NV_ERR_INVALID_ADDRESS)
                continue;

            UVM_ASSERT(status == NV_OK);
        }

-        service_context->block_context.policy = uvm_va_policy_get(va_block, address);
+        policy = uvm_va_policy_get(va_block, address);

        new_residency = uvm_va_block_select_residency(va_block,
                                                      &service_context->block_context,
                                                      page_index,
                                                      processor,
                                                      uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
-                                                      service_context->block_context.policy,
+                                                      policy,
                                                      &thrashing_hint,
                                                      UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
                                                      &read_duplicate);
@@ -1092,12 +1097,17 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
            while (first_page_index <= last_page_index) {
                uvm_page_index_t outer = last_page_index + 1;
+                const uvm_va_policy_t *policy;

                if (uvm_va_block_is_hmm(va_block)) {
-                    status = uvm_hmm_find_policy_vma_and_outer(va_block,
-                                                               &service_context->block_context,
-                                                               first_page_index,
-                                                               &outer);
+                    status = NV_ERR_INVALID_ADDRESS;
+                    if (service_context->block_context.mm) {
+                        status = uvm_hmm_find_policy_vma_and_outer(va_block,
+                                                                   &service_context->block_context.hmm.vma,
+                                                                   first_page_index,
+                                                                   &policy,
+                                                                   &outer);
+                    }
                    if (status != NV_OK)
                        break;
                }
@@ -1198,6 +1208,11 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
        service_context->num_retries = 0;
        service_context->block_context.mm = mm;

+        if (uvm_va_block_is_hmm(va_block)) {
+            uvm_hmm_service_context_init(service_context);
+            uvm_hmm_migrate_begin_wait(va_block);
+        }
+
        uvm_mutex_lock(&va_block->lock);

        reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
@@ -1211,6 +1226,9 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,

        uvm_mutex_unlock(&va_block->lock);

+        if (uvm_va_block_is_hmm(va_block))
+            uvm_hmm_migrate_finish(va_block);
+
        if (status == NV_OK)
            *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
    }
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -85,76 +85,86 @@ static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);

 static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
 {
+    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+
+    if (parent_gpu->isr.is_suspended)
+        return 0;
+
    // handling gets set to false for all handlers during removal, so quit if
    // the GPU is in the process of being removed.
-    if (parent_gpu->isr.replayable_faults.handling) {
+    if (!parent_gpu->isr.replayable_faults.handling)
+        return 0;

-        // Use raw call instead of UVM helper. Ownership will be recorded in the
-        // bottom half. See comment replayable_faults_isr_bottom_half().
-        if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) == 0) {
-            if (uvm_gpu_replayable_faults_pending(parent_gpu)) {
-                nv_kref_get(&parent_gpu->gpu_kref);
+    // Use raw call instead of UVM helper. Ownership will be recorded in the
+    // bottom half. See comment replayable_faults_isr_bottom_half().
+    if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
+        return 0;

-                // Interrupts need to be disabled here to avoid an interrupt
-                // storm
-                uvm_gpu_replayable_faults_intr_disable(parent_gpu);
-
-                // Schedule a bottom half, but do *not* release the GPU ISR
-                // lock. The bottom half releases the GPU ISR lock as part of
-                // its cleanup.
-                nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
-                                             &parent_gpu->isr.replayable_faults.bottom_half_q_item);
-                return 1;
-            }
-            else {
-                up(&parent_gpu->isr.replayable_faults.service_lock.sem);
-            }
-        }
+    if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
+        up(&parent_gpu->isr.replayable_faults.service_lock.sem);
+        return 0;
    }

-    return 0;
+    nv_kref_get(&parent_gpu->gpu_kref);
+
+    // Interrupts need to be disabled here to avoid an interrupt storm
+    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+
+    // Schedule a bottom half, but do *not* release the GPU ISR lock. The bottom
+    // half releases the GPU ISR lock as part of its cleanup.
+    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
+                                 &parent_gpu->isr.replayable_faults.bottom_half_q_item);
+
+    return 1;
 }

 static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
 {
+    bool scheduled;
+
+    if (parent_gpu->isr.is_suspended)
+        return 0;
+
    // handling gets set to false for all handlers during removal, so quit if
    // the GPU is in the process of being removed.
-    if (parent_gpu->isr.non_replayable_faults.handling) {
-        // Non-replayable_faults are stored in a synchronized circular queue
-        // shared by RM/UVM. Therefore, we can query the number of pending
-        // faults. This type of faults are not replayed and since RM advances
-        // GET to PUT when copying the fault packets to the queue, no further
-        // interrupts will be triggered by the gpu and faults may stay
-        // unserviced. Therefore, if there is a fault in the queue, we schedule
-        // a bottom half unconditionally.
-        if (uvm_gpu_non_replayable_faults_pending(parent_gpu)) {
-            bool scheduled;
-            nv_kref_get(&parent_gpu->gpu_kref);
+    if (!parent_gpu->isr.non_replayable_faults.handling)
+        return 0;

-            scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
-                                                     &parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;
+    // Non-replayable_faults are stored in a synchronized circular queue
+    // shared by RM/UVM. Therefore, we can query the number of pending
+    // faults. This type of faults are not replayed and since RM advances
+    // GET to PUT when copying the fault packets to the queue, no further
+    // interrupts will be triggered by the gpu and faults may stay
+    // unserviced. Therefore, if there is a fault in the queue, we schedule
+    // a bottom half unconditionally.
+    if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
+        return 0;

-            // If the q_item did not get scheduled because it was already
-            // queued, that instance will handle the pending faults. Just
-            // drop the GPU kref.
-            if (!scheduled)
-                uvm_parent_gpu_kref_put(parent_gpu);
+    nv_kref_get(&parent_gpu->gpu_kref);

-            return 1;
-        }
-    }
+    scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
+                                             &parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;

-    return 0;
+    // If the q_item did not get scheduled because it was already
+    // queued, that instance will handle the pending faults. Just
+    // drop the GPU kref.
+    if (!scheduled)
+        uvm_parent_gpu_kref_put(parent_gpu);
+
+    return 1;
 }

 static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

+    if (parent_gpu->isr.is_suspended)
+        return 0;
+
    if (!parent_gpu->isr.access_counters.handling_ref_count)
        return 0;

-    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem))
+    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
        return 0;

    if (!uvm_gpu_access_counters_pending(parent_gpu)) {
@@ -199,7 +209,7 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
 {
    uvm_parent_gpu_t *parent_gpu;
    unsigned num_handlers_scheduled = 0;
-    NV_STATUS status;
+    NV_STATUS status = NV_OK;

    if (!in_interrupt() && in_atomic()) {
        // Early-out if we're not in interrupt context, but memory allocations
@@ -238,18 +248,15 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)

    ++parent_gpu->isr.interrupt_count;

-    if (parent_gpu->isr.is_suspended) {
-        status = NV_ERR_NO_INTR_PENDING;
-    }
-    else {
-        num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
-        num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
-        num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
+    num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
+    num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
+    num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);

-        if (num_handlers_scheduled == 0)
-            status = NV_WARN_MORE_PROCESSING_REQUIRED;
+    if (num_handlers_scheduled == 0) {
+        if (parent_gpu->isr.is_suspended)
+            status = NV_ERR_NO_INTR_PENDING;
        else
-            status = NV_OK;
+            status = NV_WARN_MORE_PROCESSING_REQUIRED;
    }

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
@@ -511,6 +518,9 @@ static void replayable_faults_isr_bottom_half(void *args)
    uvm_gpu_replayable_faults_isr_unlock(parent_gpu);

 put_kref:
+    // It is OK to drop a reference on the parent GPU if a bottom half has
+    // been retriggered within uvm_gpu_replayable_faults_isr_unlock, because the
+    // rescheduling added an additional reference.
    uvm_parent_gpu_kref_put(parent_gpu);
 }

@@ -591,6 +601,51 @@ static void access_counters_isr_bottom_half_entry(void *args)
   UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
 }

+static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
+{
+    bool retrigger = false;
+
+    // When Confidential Computing is enabled, UVM does not (indirectly) trigger
+    // the replayable fault interrupt by updating GET. This is because, in this
+    // configuration, GET is a dummy register used to inform GSP-RM (the owner
+    // of the HW replayable fault buffer) of the latest entry consumed by the
+    // UVM driver. The real GET register is owned by GSP-RM.
+    //
+    // The retriggering of a replayable faults bottom half happens then
+    // manually, by scheduling a bottom half for later if there is any pending
+    // work in the fault buffer accessible by UVM. The retriggering adddresses
+    // two problematic scenarios caused by GET updates not setting any
+    // interrupt:
+    //
+    //   (1) UVM didn't process all the entries up to cached PUT
+    //
+    //   (2) UVM did process all the entries up to cached PUT, but GPS-RM
+    //       added new entries such that cached PUT is out-of-date
+    //
+    // In both cases, re-enablement of interrupts would have caused the
+    // replayable fault to be triggered in a non-CC setup, because the updated
+    // value of GET is different from PUT. But this not the case in Confidential
+    // Computing, so a bottom half needs to be manually scheduled in order to
+    // ensure that all faults are serviced.
+    //
+    // While in the typical case the retriggering happens within a replayable
+    // fault bottom half, it can also happen within a non-interrupt path such as
+    // uvm_gpu_fault_buffer_flush.
+    if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
+        retrigger = true;
+
+    if (!retrigger)
+        return;
+
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
+
+    // If there is pending work, schedule a replayable faults bottom
+    // half. It is valid for a bottom half (q_item) to reschedule itself.
+    (void) schedule_replayable_faults_handler(parent_gpu);
+
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+}
+
 void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
@@ -632,9 +687,9 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
    // service_lock mutex is released.

    if (parent_gpu->isr.replayable_faults.handling) {
-        // Turn page fault interrupts back on, unless remove_gpu() has already removed this GPU
-        // from the GPU table. remove_gpu() indicates that situation by setting
-        // gpu->replayable_faults.handling to false.
+        // Turn page fault interrupts back on, unless remove_gpu() has already
+        // removed this GPU from the GPU table. remove_gpu() indicates that
+        // situation by setting gpu->replayable_faults.handling to false.
        //
        // This path can only be taken from the bottom half. User threads
        // calling this function must have previously retained the GPU, so they
@@ -671,6 +726,8 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
    uvm_up_out_of_order(&parent_gpu->isr.replayable_faults.service_lock);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
+
+    replayable_faults_retrigger_bottom_half(parent_gpu);
 }

 void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -177,31 +177,34 @@ bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
    return has_pending_faults == NV_TRUE;
 }

-static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
+static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu, NvU32 *cached_faults)
 {
    NV_STATUS status;
-    NvU32 i = 0;
-    NvU32 cached_faults = 0;
-    uvm_fault_buffer_entry_t *fault_cache;
-    NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    NvU32 i;
+    NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
+    uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;

-    fault_cache = non_replayable_faults->fault_cache;
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
+    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
-    UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
+    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+                                                  current_hw_entry,
+                                                  cached_faults);

-    status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
-                                                  non_replayable_faults->shadow_buffer_copy,
-                                                  &cached_faults);
-    UVM_ASSERT(status == NV_OK);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("nvUvmInterfaceGetNonReplayableFaults() failed: %s, GPU %s\n",
+                      nvstatusToString(status),
+                      parent_gpu->name);
+
+        uvm_global_set_fatal_error(status);
+        return status;
+    }

    // Parse all faults
-    for (i = 0; i < cached_faults; ++i) {
-        uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
-
-        gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
+    for (i = 0; i < *cached_faults; ++i) {
+        parent_gpu->fault_buffer_hal->parse_non_replayable_entry(parent_gpu, current_hw_entry, fault_entry);

        // The GPU aligns the fault addresses to 4k, but all of our tracking is
        // done in PAGE_SIZE chunks which might be larger.
@@ -226,9 +229,10 @@ static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
        }

        current_hw_entry += entry_size;
+        fault_entry++;
    }

-    return cached_faults;
+    return NV_OK;
 }

 // In SRIOV, the UVM (guest) driver does not have access to the privileged
@@ -339,6 +343,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
    bool read_duplicate;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    const uvm_va_policy_t *policy;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -348,7 +353,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
    UVM_ASSERT(fault_entry->fault_address >= va_block->start);
    UVM_ASSERT(fault_entry->fault_address <= va_block->end);

-    service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
+    policy = uvm_va_policy_get(va_block, fault_entry->fault_address);

    if (service_context->num_retries == 0) {
        // notify event to tools/performance heuristics. For now we use a
@@ -357,7 +362,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
        uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
                                        va_block,
                                        gpu->id,
-                                        service_context->block_context.policy->preferred_location,
+                                        policy->preferred_location,
                                        fault_entry,
                                        ++non_replayable_faults->batch_id,
                                        false);
@@ -392,7 +397,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
                                                  page_index,
                                                  gpu->id,
                                                  fault_entry->access_type_mask,
-                                                  service_context->block_context.policy,
+                                                  policy,
                                                  &thrashing_hint,
                                                  UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
                                                  &read_duplicate);
@@ -435,6 +440,11 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
    service_context->num_retries = 0;

+    if (uvm_va_block_is_hmm(va_block)) {
+        uvm_hmm_service_context_init(service_context);
+        uvm_hmm_migrate_begin_wait(va_block);
+    }
+
    uvm_mutex_lock(&va_block->lock);

    status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
@@ -449,6 +459,9 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,

    uvm_mutex_unlock(&va_block->lock);

+    if (uvm_va_block_is_hmm(va_block))
+        uvm_hmm_migrate_finish(va_block);
+
    return status == NV_OK? tracker_status: status;
 }

@@ -459,8 +472,6 @@ static void kill_channel_delayed(void *_user_channel)
    uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
    uvm_va_space_t *va_space = user_channel->kill_channel.va_space;

-    UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
-
    uvm_va_space_down_read_rm(va_space);
    if (user_channel->gpu_va_space) {
        // RM handles the fault, which will do the correct fault reporting in the
@@ -514,6 +525,14 @@ static void schedule_kill_channel(uvm_gpu_t *gpu,
                                 &user_channel->kill_channel.kill_channel_q_item);
 }

+static void service_fault_fatal(uvm_fault_buffer_entry_t *fault_entry, NV_STATUS status)
+{
+    UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
+
+    fault_entry->is_fatal = true;
+    fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+}
+
 static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
                                           struct mm_struct *mm,
                                           uvm_fault_buffer_entry_t *fault_entry,
@@ -523,6 +542,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
    NV_STATUS status = lookup_status;
+    NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -539,27 +559,63 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
        return status;

    if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
+        struct vm_area_struct *vma;
+        uvm_va_range_t *va_range_next;
+        NvU64 fault_address = fault_entry->fault_address;
+        uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
+        uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
+
+        uvm_page_mask_zero(&ats_context->read_fault_mask);
+        uvm_page_mask_zero(&ats_context->write_fault_mask);
+
+        ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
+
        ats_invalidate->write_faults_in_batch = false;

-        // The VA isn't managed. See if ATS knows about it.
-        status = uvm_ats_service_fault_entry(gpu_va_space, fault_entry, ats_invalidate);
+        va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);

-        // Invalidate ATS TLB entries if needed
-        if (status == NV_OK) {
-            status = uvm_ats_invalidate_tlbs(gpu_va_space,
-                                             ats_invalidate,
-                                             &non_replayable_faults->fault_service_tracker);
+        // The VA isn't managed. See if ATS knows about it.
+        vma = find_vma_intersection(mm, fault_address, fault_address + 1);
+        if (!vma || uvm_ats_check_in_gmmu_region(gpu_va_space->va_space, fault_address, va_range_next)) {
+
+            // Do not return error due to logical errors in the application
+            status = NV_OK;
+        }
+        else {
+            NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(fault_address);
+            uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
+            uvm_page_index_t page_index = (fault_address - base) / PAGE_SIZE;
+            uvm_page_mask_t *fault_mask = (fault_access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) ?
+                                                                                       &ats_context->write_fault_mask :
+                                                                                       &ats_context->read_fault_mask;
+
+            uvm_page_mask_set(fault_mask, page_index);
+
+            status = uvm_ats_service_faults(gpu_va_space, vma, base, ats_context);
+            if (status == NV_OK) {
+                // Invalidate ATS TLB entries if needed
+                if (uvm_page_mask_test(faults_serviced_mask, page_index)) {
+                    status = uvm_ats_invalidate_tlbs(gpu_va_space,
+                                                     ats_invalidate,
+                                                     &non_replayable_faults->fault_service_tracker);
+                    fatal_fault_status = NV_OK;
+                }
+            }
+            else {
+                fatal_fault_status = status;
+            }
        }
    }
    else {
-        UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
-        fault_entry->is_fatal = true;
-        fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
+        fatal_fault_status = status;

        // Do not return error due to logical errors in the application
        status = NV_OK;
    }

+    if (fatal_fault_status != NV_OK)
+        service_fault_fatal(fault_entry, fatal_fault_status);
+
    return status;
 }

@@ -623,10 +679,17 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
    fault_entry->fault_source.channel_id = user_channel->hw_channel_id;

    if (!fault_entry->is_fatal) {
-        status = uvm_va_block_find_create(fault_entry->va_space,
-                                          fault_entry->fault_address,
-                                          va_block_context,
-                                          &va_block);
+        if (mm) {
+            status = uvm_va_block_find_create(fault_entry->va_space,
+                                              fault_entry->fault_address,
+                                              &va_block_context->hmm.vma,
+                                              &va_block);
+        }
+        else {
+            status = uvm_va_block_find_create_managed(fault_entry->va_space,
+                                                      fault_entry->fault_address,
+                                                      &va_block);
+        }
        if (status == NV_OK)
            status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
        else
@@ -654,31 +717,35 @@ exit_no_channel:
    uvm_va_space_up_read(va_space);
    uvm_va_space_mm_release_unlock(va_space, mm);

+    if (status != NV_OK)
+        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
+
    return status;
 }

 void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
 {
-    NV_STATUS status = NV_OK;
    NvU32 cached_faults;

    // If this handler is modified to handle fewer than all of the outstanding
    // faults, then special handling will need to be added to uvm_suspend()
    // to guarantee that fault processing has completed before control is
    // returned to the RM.
-    while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
+    do {
+        NV_STATUS status;
        NvU32 i;

+        status = fetch_non_replayable_fault_buffer_entries(gpu->parent, &cached_faults);
+        if (status != NV_OK)
+            return;
+
        // Differently to replayable faults, we do not batch up and preprocess
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
        for (i = 0; i < cached_faults; ++i) {
            status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
            if (status != NV_OK)
-                break;
+                return;
        }
-    }
-
-    if (status != NV_OK)
-        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
+    } while (cached_faults > 0);
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@@ -75,4 +75,7 @@ void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
 // only called from the ISR bottom half
 void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);

+// Returns true if UVM owns the hardware replayable fault buffer
+bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu);
+
 #endif // __UVM_GPU_PAGE_FAULT_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -26,6 +26,7 @@
 #include "uvm_global.h"
 #include "uvm_kvmalloc.h"
 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
+#include "uvm_conf_computing.h"

 #define UVM_SEMAPHORE_SIZE 4
 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
@@ -44,6 +45,9 @@ struct uvm_gpu_semaphore_pool_struct
    // List of all the semaphore pages belonging to the pool
    struct list_head pages;

+    // Pages aperture.
+    uvm_aperture_t aperture;
+
    // Count of free semaphores among all the pages
    NvU32 free_semaphores_count;

@@ -66,11 +70,24 @@ struct uvm_gpu_semaphore_pool_page_struct
    DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
 };

+static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
+{
+    return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
+}
+
+static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
+{
+    return gpu_semaphore_pool_is_secure(semaphore->page->pool);
+}
+
 static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
 {
    NvU32 offset;
    NvU32 index;

+    if (gpu_semaphore_is_secure(semaphore))
+        return semaphore->conf_computing.index;
+
    UVM_ASSERT(semaphore->payload != NULL);
    UVM_ASSERT(semaphore->page != NULL);

@@ -118,6 +135,14 @@ static bool is_canary(NvU32 val)
    return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
 }

+static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
+{
+    // A pool allocated in the CPR of vidmem cannot be read/written from the
+    // CPU.
+    return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
+    return UVM_IS_DEBUG();
+}
+
 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
 // pool?
 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
@@ -125,12 +150,34 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
    return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
 }

+// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
+// the owning GPU as no other processor have access to it.
+static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
+                                        uvm_gpu_semaphore_pool_page_t *pool_page,
+                                        uvm_rm_mem_type_t memory_type)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
+    status = uvm_rm_mem_alloc(pool->gpu,
+                              memory_type,
+                              UVM_SEMAPHORE_PAGE_SIZE,
+                              UVM_CONF_COMPUTING_BUF_ALIGNMENT,
+                              &pool_page->memory);
+
+    if (status != NV_OK)
+        return status;
+
+    return NV_OK;
+}
+
 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
 {
    NV_STATUS status;
    uvm_gpu_semaphore_pool_page_t *pool_page;
    NvU32 *payloads;
    size_t i;
+    uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;

    uvm_assert_mutex_locked(&pool->mutex);

@@ -141,13 +188,24 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)

    pool_page->pool = pool;

+    // Whenever the Confidential Computing feature is enabled, engines can
+    // access semaphores only in the CPR of vidmem. Mapping to other GPUs is
+    // also disabled.
+    if (gpu_semaphore_pool_is_secure(pool)) {
+        status = pool_alloc_secure_page(pool, pool_page, memory_type);
+
+        if (status != NV_OK)
+            goto error;
+    }
+    else {
    status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
+                                          memory_type,
                                          UVM_SEMAPHORE_PAGE_SIZE,
                                          0,
                                          &pool_page->memory);
    if (status != NV_OK)
        goto error;
+    }

    // Verify the GPU can access the semaphore pool.
    UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
@@ -158,8 +216,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
    list_add(&pool_page->all_pages_node, &pool->pages);
    pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;

-    // Initialize the semaphore payloads to known values
-    if (UVM_IS_DEBUG()) {
+    if (semaphore_uses_canary(pool)) {
        payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
            payloads[i] = make_canary(0);
@@ -175,8 +232,6 @@ error:
 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
 {
    uvm_gpu_semaphore_pool_t *pool;
-    NvU32 *payloads;
-    size_t i;

    UVM_ASSERT(page);
    pool = page->pool;
@@ -189,9 +244,9 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
                   "count: %u\n",
                   pool->free_semaphores_count);

-    // Check for semaphore release-after-free
-    if (UVM_IS_DEBUG()) {
-        payloads = uvm_rm_mem_get_cpu_va(page->memory);
+    if (semaphore_uses_canary(pool)) {
+        size_t i;
+        NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
            UVM_ASSERT(is_canary(payloads[i]));
    }
@@ -222,11 +277,18 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
        if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
            continue;

-        semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
+        if (gpu_semaphore_pool_is_secure(pool)) {
+            semaphore->conf_computing.index = semaphore_index;
+        }
+        else {
+            semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
+                                                 semaphore_index * UVM_SEMAPHORE_SIZE);
+        }
+
        semaphore->page = page;

-        // Check for semaphore release-after-free
-        UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
+        if (semaphore_uses_canary(pool))
+            UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));

        uvm_gpu_semaphore_set_payload(semaphore, 0);

@@ -265,7 +327,7 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)

    // Write a known value lower than the current payload in an attempt to catch
    // release-after-free and acquire-after-free.
-    if (UVM_IS_DEBUG())
+    if (semaphore_uses_canary(pool))
        uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));

    uvm_mutex_lock(&pool->mutex);
@@ -293,12 +355,26 @@ NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t

    pool->free_semaphores_count = 0;
    pool->gpu = gpu;
+    pool->aperture = UVM_APERTURE_SYS;

    *pool_out = pool;

    return NV_OK;
 }

+NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+
+    status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
+    if (status == NV_OK)
+        (*pool_out)->aperture = UVM_APERTURE_VID;
+
+    return status;
+}
+
 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
 {
    uvm_gpu_semaphore_pool_page_t *page;
@@ -374,13 +450,16 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
 {
    NvU32 index = get_index(semaphore);
-    NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
+    NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;

    return base_va + UVM_SEMAPHORE_SIZE * index;
 }

 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
 {
+    if (gpu_semaphore_is_secure(semaphore))
+        return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
+
    return UVM_GPU_READ_ONCE(*semaphore->payload);
 }

@@ -397,6 +476,10 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
    // being optimized out on non-SMP configs (we need them for interacting with
    // the GPU correctly even on non-SMP).
    mb();
+
+    if (gpu_semaphore_is_secure(semaphore))
+            UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
+    else
    UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
 }

@@ -424,9 +507,22 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
    return true;
 }

+bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
+{
+    uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
+
+    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return true;
+
+    return false;
+}
+
+
 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
 {
    NV_STATUS status;
+    uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;

    memset(tracking_sem, 0, sizeof(*tracking_sem));

@@ -436,7 +532,14 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g

    UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);

-    uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
+    if (uvm_conf_computing_mode_enabled(pool->gpu))
+        order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
+
+    if (tracking_semaphore_uses_mutex(tracking_sem))
+        uvm_mutex_init(&tracking_sem->m_lock, order);
+    else
+        uvm_spin_lock_init(&tracking_sem->s_lock, order);
+
    atomic64_set(&tracking_sem->completed_value, 0);
    tracking_sem->queued_value = 0;

@@ -448,15 +551,119 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
    uvm_gpu_semaphore_free(&tracking_sem->semaphore);
 }

+static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
+{
+    // No new value, or the GPU is currently writing the new encrypted material
+    // and no change in value would still result in corrupted data.
+    return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
+}
+
+static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
+{
+    UvmCslIv local_iv;
+    NvU32 local_payload;
+    NvU32 new_sem_value;
+    NvU32 gpu_notifier;
+    NvU32 last_observed_notifier;
+    NvU32 new_gpu_notifier = 0;
+    NvU32 iv_index = 0;
+
+    // A channel can have multiple entries pending and the tracking semaphore
+    // update of each entry can race with this function. Since the semaphore
+    // needs to be updated to release a used entry, we never need more
+    // than 'num_gpfifo_entries' re-tries.
+    unsigned tries_left = channel->num_gpfifo_entries;
+    NV_STATUS status = NV_OK;
+    NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
+    UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
+    void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
+    NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
+    NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
+    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
+
+    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(uvm_channel_is_ce(channel));
+
+    last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
+    gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+    UVM_ASSERT(last_observed_notifier <= gpu_notifier);
+
+    if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
+        return;
+
+    do {
+        gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+
+        // Odd notifier value means there's an update in progress.
+        if (gpu_notifier % 2)
+            continue;
+
+        // Make sure no memory accesses happen before we read the notifier
+        smp_mb__after_atomic();
+
+        iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
+        memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
+        local_payload = UVM_READ_ONCE(*payload_cpu_addr);
+        memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
+
+        // Make sure the second read of notifier happens after
+        // all memory accesses.
+        smp_mb__before_atomic();
+        new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+        tries_left--;
+    } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
+
+    if (!tries_left) {
+        status = NV_ERR_INVALID_STATE;
+        goto error;
+    }
+
+    if (gpu_notifier == new_gpu_notifier) {
+        status = uvm_conf_computing_cpu_decrypt(channel,
+                                                &new_sem_value,
+                                                &local_payload,
+                                                &local_iv,
+                                                sizeof(new_sem_value),
+                                                &local_auth_tag);
+
+        if (status != NV_OK)
+            goto error;
+
+        uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
+        UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
+    }
+
+    return;
+
+error:
+    // Decryption failure is a fatal error as well as running out of try left.
+    // Upon testing, all decryption happened within one try, anything that
+    // would require ten retry would be considered active tampering with the
+    // data structures.
+    uvm_global_set_fatal_error(status);
+}
+
 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
 {
    NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
    // The semaphore value is the bottom 32 bits of completed_value
    NvU32 old_sem_value = (NvU32)old_value;
-    NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
+    NvU32 new_sem_value;
    NvU64 new_value;

-    uvm_assert_spinlock_locked(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
+    else
+        uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
+
+    if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
+        // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
+        //                     mechanism to all semaphore
+        uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
+        uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
+    }
+
+    new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);

    // The following logic to update the completed value is very subtle, it
    // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
@@ -465,7 +672,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    if (old_sem_value == new_sem_value) {
        // No progress since the last update.
        // No additional memory barrier required in this case as completed_value
-        // is always updated under the spinlock that this thread just acquired.
+        // is always updated under the lock that this thread just acquired.
        // That guarantees full ordering with all the accesses the thread that
        // updated completed_value did under the lock including the GPU
        // semaphore read.
@@ -492,7 +699,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
                           (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
                           old_value, new_value);

-    // Use an atomic write even though the spinlock is held so that the value can
+    // Use an atomic write even though the lock is held so that the value can
    // be (carefully) read atomically outside of the lock.
    //
    // atomic64_set() on its own doesn't imply any memory barriers and we need
@@ -520,9 +727,9 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    // guarantees that no accesses will be ordered above the atomic (and hence
    // the GPU semaphore read).
    //
-    // Notably the soon following uvm_spin_unlock() is a release barrier that
-    // allows later memory accesses to be reordered above it and hence doesn't
-    // provide the necessary ordering with the GPU semaphore read.
+    // Notably the soon following unlock is a release barrier that allows later
+    // memory accesses to be reordered above it and hence doesn't provide the
+    // necessary ordering with the GPU semaphore read.
    //
    // Also notably this would still need to be handled if we ever switch to
    // atomic64_set_release() and atomic64_read_acquire() for accessing
@@ -539,11 +746,17 @@ NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semapho
    // Check that the GPU which owns the semaphore is still present
    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));

-    uvm_spin_lock(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_mutex_lock(&tracking_semaphore->m_lock);
+    else
+        uvm_spin_lock(&tracking_semaphore->s_lock);

    completed = update_completed_value_locked(tracking_semaphore);

-    uvm_spin_unlock(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_mutex_unlock(&tracking_semaphore->m_lock);
+    else
+        uvm_spin_unlock(&tracking_semaphore->s_lock);

    return completed;
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.h
@@ -47,6 +47,16 @@ struct uvm_gpu_semaphore_struct

    // Pointer to the memory location
    NvU32 *payload;
+    struct {
+        NvU16 index;
+        NvU32 cached_payload;
+        uvm_rm_mem_t *encrypted_payload;
+        uvm_rm_mem_t *notifier;
+        uvm_rm_mem_t *auth_tag;
+        UvmCslIv *ivs;
+        NvU32 last_pushed_notifier;
+        NvU32 last_observed_notifier;
+    } conf_computing;
 };

 // A primitive used for tracking progress of the GPU
@@ -67,7 +77,10 @@ struct uvm_gpu_tracking_semaphore_struct
    atomic64_t completed_value;

    // Lock protecting updates to the completed_value
-    uvm_spinlock_t lock;
+    union {
+        uvm_spinlock_t s_lock;
+        uvm_mutex_t m_lock;
+    };

    // Last queued value
    // All accesses to the queued value should be handled by the user of the GPU
@@ -78,6 +91,12 @@ struct uvm_gpu_tracking_semaphore_struct
 // Create a semaphore pool for a GPU.
 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);

+// When the Confidential Computing feature is enabled, semaphore pools
+// associated with CE channels are allocated in the CPR of vidmem and as such
+// have all the associated access restrictions. Because of this, they're called
+// secure pools and secure semaphores are allocated out of said secure pools.
+NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
+
 // Destroy a semaphore pool
 // Locking:
 //  - Global lock needs to be held in read mode (for unmapping from all GPUs)
@@ -90,6 +109,9 @@ void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
 // Allocate a semaphore from the pool.
 // The semaphore will be mapped on all GPUs currently registered with the UVM
 // driver, and on all new GPUs which will be registered in the future.
+// Unless the Confidential Computing feature is enabled and the pool is a
+// secure pool. In this case, it is only mapped to the GPU that holds the
+// allocation.
 // The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
 // to the proxy address space.
 //
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2021 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,11 +44,16 @@
 #include "clc86f.h"
 #include "clc8b5.h"

+static int uvm_downgrade_force_membar_sys = 1;
+module_param(uvm_downgrade_force_membar_sys, uint, 0644);
+MODULE_PARM_DESC(uvm_downgrade_force_membar_sys, "Force all TLB invalidation downgrades to use MEMBAR_SYS");
+
 #define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
 #define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
 #define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
 #define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
 #define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
+#define SEC2_OP_COUNT (sizeof(uvm_sec2_hal_t) / sizeof(void *))

 // Table for copy engine functions.
 // Each entry is associated with a copy engine class through the 'class' field.
@@ -61,7 +66,7 @@ static uvm_hal_class_ops_t ce_table[] =
        .id = MAXWELL_DMA_COPY_A,
        .u.ce_ops = {
            .init = uvm_hal_maxwell_ce_init,
-            .method_validate = uvm_hal_method_validate_stub,
+            .method_is_valid = uvm_hal_method_is_valid_stub,
            .semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
            .semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
            .semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
@@ -69,15 +74,18 @@ static uvm_hal_class_ops_t ce_table[] =
            .offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
            .phys_mode = uvm_hal_maxwell_ce_phys_mode,
            .plc_mode = uvm_hal_maxwell_ce_plc_mode,
-            .memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
+            .memcopy_copy_type = uvm_hal_maxwell_ce_memcopy_copy_type,
+            .memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
            .memcopy = uvm_hal_maxwell_ce_memcopy,
            .memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
-            .memset_validate = uvm_hal_ce_memset_validate_stub,
+            .memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
            .memset_1 = uvm_hal_maxwell_ce_memset_1,
            .memset_4 = uvm_hal_maxwell_ce_memset_4,
            .memset_8 = uvm_hal_maxwell_ce_memset_8,
            .memset_v_4 = uvm_hal_maxwell_ce_memset_v_4,
+            .encrypt = uvm_hal_maxwell_ce_encrypt_unsupported,
+            .decrypt = uvm_hal_maxwell_ce_decrypt_unsupported,
        }
    },
    {
@@ -99,7 +107,15 @@ static uvm_hal_class_ops_t ce_table[] =
    {
        .id = VOLTA_DMA_COPY_A,
        .parent_id = PASCAL_DMA_COPY_B,
-        .u.ce_ops = {},
+        .u.ce_ops = {
+            .semaphore_release = uvm_hal_volta_ce_semaphore_release,
+            .semaphore_timestamp = uvm_hal_volta_ce_semaphore_timestamp,
+            .semaphore_reduction_inc = uvm_hal_volta_ce_semaphore_reduction_inc,
+            .memcopy = uvm_hal_volta_ce_memcopy,
+            .memset_1 = uvm_hal_volta_ce_memset_1,
+            .memset_4 = uvm_hal_volta_ce_memset_4,
+            .memset_8 = uvm_hal_volta_ce_memset_8,
+        },
    },
    {
        .id = TURING_DMA_COPY_A,
@@ -110,22 +126,22 @@ static uvm_hal_class_ops_t ce_table[] =
        .id = AMPERE_DMA_COPY_A,
        .parent_id = TURING_DMA_COPY_A,
        .u.ce_ops = {
-            .method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
+            .method_is_valid = uvm_hal_ampere_ce_method_is_valid_c6b5,
            .phys_mode = uvm_hal_ampere_ce_phys_mode,
-            .memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
+            .memcopy_is_valid = uvm_hal_ampere_ce_memcopy_is_valid_c6b5,
            .memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
-            .memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
+            .memset_is_valid = uvm_hal_ampere_ce_memset_is_valid_c6b5,
        },
    },
    {
        .id = AMPERE_DMA_COPY_B,
        .parent_id = AMPERE_DMA_COPY_A,
        .u.ce_ops = {
-            .method_validate = uvm_hal_method_validate_stub,
+            .method_is_valid = uvm_hal_method_is_valid_stub,
            .plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
-            .memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
+            .memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
            .memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
-            .memset_validate = uvm_hal_ce_memset_validate_stub,
+            .memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
        },
    },
    {
@@ -137,9 +153,14 @@ static uvm_hal_class_ops_t ce_table[] =
            .semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
            .offset_out = uvm_hal_hopper_ce_offset_out,
            .offset_in_out = uvm_hal_hopper_ce_offset_in_out,
+            .memcopy_copy_type = uvm_hal_hopper_ce_memcopy_copy_type,
            .memset_1 = uvm_hal_hopper_ce_memset_1,
            .memset_4 = uvm_hal_hopper_ce_memset_4,
            .memset_8 = uvm_hal_hopper_ce_memset_8,
+            .memcopy_is_valid = uvm_hal_hopper_ce_memcopy_is_valid,
+            .memset_is_valid = uvm_hal_hopper_ce_memset_is_valid,
+            .encrypt = uvm_hal_hopper_ce_encrypt,
+            .decrypt = uvm_hal_hopper_ce_decrypt,
        },
    },
 };
@@ -152,8 +173,8 @@ static uvm_hal_class_ops_t host_table[] =
        .id = KEPLER_CHANNEL_GPFIFO_B,
        .u.host_ops = {
            .init = uvm_hal_maxwell_host_init_noop,
-            .method_validate = uvm_hal_method_validate_stub,
-            .sw_method_validate = uvm_hal_method_validate_stub,
+            .method_is_valid = uvm_hal_method_is_valid_stub,
+            .sw_method_is_valid = uvm_hal_method_is_valid_stub,
            .wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
            .membar_sys = uvm_hal_maxwell_host_membar_sys,
            // No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
@@ -235,8 +256,8 @@ static uvm_hal_class_ops_t host_table[] =
        .id = AMPERE_CHANNEL_GPFIFO_A,
        .parent_id = TURING_CHANNEL_GPFIFO_A,
        .u.host_ops = {
-            .method_validate = uvm_hal_ampere_host_method_validate,
-            .sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
+            .method_is_valid = uvm_hal_ampere_host_method_is_valid,
+            .sw_method_is_valid = uvm_hal_ampere_host_sw_method_is_valid,
            .clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
            .clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
            .tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
@@ -248,8 +269,8 @@ static uvm_hal_class_ops_t host_table[] =
        .id = HOPPER_CHANNEL_GPFIFO_A,
        .parent_id = AMPERE_CHANNEL_GPFIFO_A,
        .u.host_ops = {
-            .method_validate = uvm_hal_method_validate_stub,
-            .sw_method_validate = uvm_hal_method_validate_stub,
+            .method_is_valid = uvm_hal_method_is_valid_stub,
+            .sw_method_is_valid = uvm_hal_method_is_valid_stub,
            .semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
            .semaphore_release = uvm_hal_hopper_host_semaphore_release,
            .semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
@@ -352,11 +373,12 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
            .write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
            .get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
-            .parse_entry = uvm_hal_maxwell_fault_buffer_parse_entry_unsupported,
+            .parse_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported,
            .entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
            .entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
            .entry_size = uvm_hal_maxwell_fault_buffer_entry_size_unsupported,
            .parse_non_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported,
+            .get_fault_type = uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported,
        }
    },
    {
@@ -374,10 +396,11 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_put = uvm_hal_pascal_fault_buffer_read_put,
            .read_get = uvm_hal_pascal_fault_buffer_read_get,
            .write_get = uvm_hal_pascal_fault_buffer_write_get,
-            .parse_entry = uvm_hal_pascal_fault_buffer_parse_entry,
+            .parse_replayable_entry = uvm_hal_pascal_fault_buffer_parse_replayable_entry,
            .entry_is_valid = uvm_hal_pascal_fault_buffer_entry_is_valid,
            .entry_clear_valid = uvm_hal_pascal_fault_buffer_entry_clear_valid,
            .entry_size = uvm_hal_pascal_fault_buffer_entry_size,
+            .get_fault_type = uvm_hal_pascal_fault_buffer_get_fault_type,
        }
    },
    {
@@ -388,8 +411,9 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_volta_fault_buffer_read_get,
            .write_get = uvm_hal_volta_fault_buffer_write_get,
            .get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
-            .parse_entry = uvm_hal_volta_fault_buffer_parse_entry,
+            .parse_replayable_entry = uvm_hal_volta_fault_buffer_parse_replayable_entry,
            .parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
+            .get_fault_type = uvm_hal_volta_fault_buffer_get_fault_type,
        }
    },
    {
@@ -481,6 +505,59 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
    },
 };

+static uvm_hal_class_ops_t sec2_table[] =
+{
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.sec2_ops = {
+            .init = uvm_hal_maxwell_sec2_init_noop,
+            .decrypt = uvm_hal_maxwell_sec2_decrypt_unsupported,
+            .semaphore_release = uvm_hal_maxwell_sec2_semaphore_release_unsupported,
+            .semaphore_timestamp = uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
+        .u.sec2_ops = {}
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
+        .u.sec2_ops = {
+            .init = uvm_hal_hopper_sec2_init,
+            .semaphore_release = uvm_hal_hopper_sec2_semaphore_release,
+            .semaphore_timestamp = uvm_hal_hopper_sec2_semaphore_timestamp_unsupported,
+            .decrypt = uvm_hal_hopper_sec2_decrypt,
+        }
+    },
+};
+
 static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
 {
    NvLength i;
@@ -584,6 +661,15 @@ NV_STATUS uvm_hal_init_table(void)
        return status;
    }

+    status = ops_init_from_parent(sec2_table,
+                                  ARRAY_SIZE(sec2_table),
+                                  SEC2_OP_COUNT,
+                                  offsetof(uvm_hal_class_ops_t, u.sec2_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_parent(sec2_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
    return NV_OK;
 }

@@ -634,17 +720,34 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;

+    class_ops = ops_find_by_id(sec2_table, ARRAY_SIZE(sec2_table), gpu_info->gpuArch);
+    if (class_ops == NULL) {
+        UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        return NV_ERR_INVALID_CLASS;
+    }
+
+    parent_gpu->sec2_hal = &class_ops->u.sec2_ops;
+
    return NV_OK;
 }

+static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
+{
+    // Access counters are currently not supported in vGPU.
+    //
+    // TODO: Bug 200692962: Add support for access counters in vGPU
+    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
+        parent_gpu->access_counters_supported = false;
+    // Access counters are not supported in CC.
+    else if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
+        parent_gpu->access_counters_supported = false;
+}
+
 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
 {
    parent_gpu->arch_hal->init_properties(parent_gpu);

-    // Override the HAL when in non-passthrough virtualization
-    // TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
-    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
-        parent_gpu->access_counters_supported = false;
+    hal_override_properties(parent_gpu);
 }

 void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
@@ -663,6 +766,44 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
    uvm_hal_membar(gpu, push, membar);
 }

+bool uvm_hal_membar_before_semaphore(uvm_push_t *push)
+{
+    uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
+
+    if (membar == UVM_MEMBAR_NONE) {
+        // No MEMBAR requested, don't use a flush.
+        return false;
+    }
+
+    if (membar == UVM_MEMBAR_GPU) {
+        // MEMBAR GPU requested, do it on the HOST and skip the engine flush as
+        // it doesn't have this capability.
+        uvm_hal_wfi_membar(push, UVM_MEMBAR_GPU);
+        return false;
+    }
+
+    // By default do a MEMBAR SYS and for that we can just use flush on the
+    // semaphore operation.
+    return true;
+}
+
+uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
+{
+    // If the mapped memory was local, and we're not using a coherence protocol,
+    // we only need a GPU-local membar. This is because all accesses to this
+    // memory, including those from other processors like the CPU or peer GPUs,
+    // must come through this GPU's L2. In all current architectures, MEMBAR_GPU
+    // is sufficient to resolve ordering at the L2 level.
+    if (is_local_vidmem && !uvm_gpu_is_coherent(gpu->parent) && !uvm_downgrade_force_membar_sys)
+        return UVM_MEMBAR_GPU;
+
+    // If the mapped memory was remote, or if a coherence protocol can cache
+    // this GPU's memory, then there are external ways for other processors to
+    // access the memory without always going the local GPU L2, so we must use a
+    // MEMBAR_SYS.
+    return UVM_MEMBAR_SYS;
+}
+
 const char *uvm_aperture_string(uvm_aperture_t aperture)
 {
    BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
@@ -823,12 +964,12 @@ void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_e
    UVM_DBG_PRINT("    tag             %x\n", entry->tag);
 }

-bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
+bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
 {
    return true;
 }

-bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
    return true;
 }
@@ -837,7 +978,7 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
 {
 }

-bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
+bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size)
 {
    return true;
 }
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -34,20 +34,22 @@

 // A dummy method validation that always returns true; it can be used to skip
 // CE/Host/SW method validations for a given architecture
-bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);

 typedef void (*uvm_hal_init_t)(uvm_push_t *push);
 void uvm_hal_maxwell_ce_init(uvm_push_t *push);
 void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
 void uvm_hal_pascal_host_init(uvm_push_t *push);
+void uvm_hal_maxwell_sec2_init_noop(uvm_push_t *push);
+void uvm_hal_hopper_sec2_init(uvm_push_t *push);

 // Host method validation
-typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
-bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);

 // SW method validation
-typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
-bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+typedef bool (*uvm_hal_host_sw_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);

 // Wait for idle
 typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
@@ -207,8 +209,11 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
 typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_maxwell_sec2_semaphore_release_unsupported(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

@@ -220,21 +225,37 @@ void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
 typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
+void uvm_hal_volta_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);

 void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
 void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);

+void uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
+void uvm_hal_hopper_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
+
 typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

-typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
-void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
-void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
-void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
+typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry,
+                                                NvU64 pushbuffer_va,
+                                                NvU32 pushbuffer_length,
+                                                uvm_gpfifo_sync_t sync_flag);
+void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry,
+                                           NvU64 pushbuffer_va,
+                                           NvU32 pushbuffer_length,
+                                           uvm_gpfifo_sync_t sync_flag);
+void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry,
+                                          NvU64 pushbuffer_va,
+                                          NvU32 pushbuffer_length,
+                                          uvm_gpfifo_sync_t sync_flag);
+void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry,
+                                          NvU64 pushbuffer_va,
+                                          NvU32 pushbuffer_length,
+                                          uvm_gpfifo_sync_t sync_flag);

 typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
 void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
@@ -271,17 +292,22 @@ typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
 NvU32 uvm_hal_maxwell_ce_plc_mode(void);
 NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);

+typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+
 // CE method validation
-typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
-bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
+bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);

 // Memcopy validation.
 // The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
-// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
+// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
 // a given architecture.
-typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
-bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
-bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);

 // Patching of the memcopy source; if not needed for a given architecture use
 // the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
@@ -296,6 +322,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
 // UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
 typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
 void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
+void uvm_hal_volta_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);

 // Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
 typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
@@ -303,11 +330,21 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s

 // Memset validation.
 // The validation happens at the start of the memset (uvm_hal_memset_*_t)
-// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
+// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
 // a given architecture.
-typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
-bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
-bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
+typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push,
+                                           uvm_gpu_address_t dst,
+                                           size_t num_elements,
+                                           size_t element_size);
+bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size);
+bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
+                                            uvm_gpu_address_t dst,
+                                            size_t num_elements,
+                                            size_t element_size);
+bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
+                                       uvm_gpu_address_t dst,
+                                       size_t num_elements,
+                                       size_t element_size);

 // Memset size bytes at dst to a given N-byte input value.
 //
@@ -329,10 +366,62 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
 void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
 void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);

+void uvm_hal_volta_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
+void uvm_hal_volta_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
+void uvm_hal_volta_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
+
 void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
 void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
 void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);

+// Encrypts the contents of the source buffer into the destination buffer, up to
+// the given size. The authentication tag of the encrypted contents is written
+// to auth_tag, so it can be verified later on by a decrypt operation.
+//
+// The addressing modes of the destination and authentication tag addresses
+// should match. If the addressing mode is physical, then the address apertures
+// should also match.
+typedef void (*uvm_hal_ce_encrypt_t)(uvm_push_t *push,
+                                     uvm_gpu_address_t dst,
+                                     uvm_gpu_address_t src,
+                                     NvU32 size,
+                                     uvm_gpu_address_t auth_tag);
+
+// Decrypts the contents of the source buffer into the destination buffer, up to
+// the given size. The method also verifies the integrity of the encrypted
+// buffer by calculating its authentication tag, and comparing it with the one
+// provided as argument.
+//
+// The addressing modes of the source and authentication tag addresses should
+// match. If the addressing mode is physical, then the address apertures should
+// also match.
+typedef void (*uvm_hal_ce_decrypt_t)(uvm_push_t *push,
+                                     uvm_gpu_address_t dst,
+                                     uvm_gpu_address_t src,
+                                     NvU32 size,
+                                     uvm_gpu_address_t auth_tag);
+
+void uvm_hal_maxwell_ce_encrypt_unsupported(uvm_push_t *push,
+                                            uvm_gpu_address_t dst,
+                                            uvm_gpu_address_t src,
+                                            NvU32 size,
+                                            uvm_gpu_address_t auth_tag);
+void uvm_hal_maxwell_ce_decrypt_unsupported(uvm_push_t *push,
+                                            uvm_gpu_address_t dst,
+                                            uvm_gpu_address_t src,
+                                            NvU32 size,
+                                            uvm_gpu_address_t auth_tag);
+void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
+                               uvm_gpu_address_t dst,
+                               uvm_gpu_address_t src,
+                               NvU32 size,
+                               uvm_gpu_address_t auth_tag);
+void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
+                               uvm_gpu_address_t dst,
+                               uvm_gpu_address_t src,
+                               NvU32 size,
+                               uvm_gpu_address_t auth_tag);
+
 // Increments the semaphore by 1, or resets to 0 if the incremented value would
 // exceed the payload.
 //
@@ -342,6 +431,7 @@ void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 v
 typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
+void uvm_hal_volta_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
 void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);

 // Initialize GPU architecture dependent properties
@@ -395,15 +485,29 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);

-// Parse the entry on the given buffer index. This also clears the valid bit of
-// the entry in the buffer.
-typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
-                                                   NvU32 index,
-                                                   uvm_fault_buffer_entry_t *buffer_entry);
+// Parse the replayable entry at the given buffer index. This also clears the
+// valid bit of the entry in the buffer.
+typedef NV_STATUS (*uvm_hal_fault_buffer_parse_replayable_entry_t)(uvm_parent_gpu_t *gpu,
+                                                                   NvU32 index,
+                                                                   uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                          NvU32 index,
+                                                                          uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                             NvU32 index,
+                                                             uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                            NvU32 index,
+                                                            uvm_fault_buffer_entry_t *buffer_entry);
+
 typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
+typedef uvm_fault_type_t (*uvm_hal_fault_buffer_get_fault_type_t)(const NvU32 *fault_entry);
 typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
 typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
                                                uvm_gpu_phys_address_t instance_ptr,
@@ -417,25 +521,24 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
-                                                          NvU32 index,
-                                                          uvm_fault_buffer_entry_t *buffer_entry);
+uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);
+
 void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                             NvU32 index,
-                                             uvm_fault_buffer_entry_t *buffer_entry);
+
+uvm_fault_type_t uvm_hal_pascal_fault_buffer_get_fault_type(const NvU32 *fault_entry);
+
 NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                            NvU32 index,
-                                            uvm_fault_buffer_entry_t *buffer_entry);
+
+uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);
+
 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
@@ -576,11 +679,33 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
 void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);

+// The source and destination addresses must be 16-byte aligned. Note that the
+// best performance is achieved with 256-byte alignment. The decrypt size must
+// be larger than 0, and a multiple of 4 bytes.
+//
+// The authentication tag address must also be 16-byte aligned.
+// The authentication tag buffer size is UVM_CONF_COMPUTING_AUTH_TAG_SIZE bytes
+// defined in uvm_conf_computing.h.
+//
+// Decrypts the src buffer into the dst buffer of the given size.
+// The method also verifies integrity of the src buffer by calculating its
+// authentication tag and comparing it with the provided one.
+//
+// Note: SEC2 does not support encryption.
+typedef void (*uvm_hal_sec2_decrypt_t)(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
+
+void uvm_hal_maxwell_sec2_decrypt_unsupported(uvm_push_t *push,
+                                              NvU64 dst_va,
+                                              NvU64 src_va,
+                                              NvU32 size,
+                                              NvU64 auth_tag_va);
+void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
+
 struct uvm_host_hal_struct
 {
    uvm_hal_init_t init;
-    uvm_hal_host_method_validate method_validate;
-    uvm_hal_host_sw_method_validate sw_method_validate;
+    uvm_hal_host_method_is_valid method_is_valid;
+    uvm_hal_host_sw_method_is_valid sw_method_is_valid;
    uvm_hal_wait_for_idle_t wait_for_idle;
    uvm_hal_membar_sys_t membar_sys;
    uvm_hal_membar_gpu_t membar_gpu;
@@ -612,23 +737,26 @@ struct uvm_host_hal_struct
 struct uvm_ce_hal_struct
 {
    uvm_hal_init_t init;
-    uvm_hal_ce_method_validate method_validate;
+    uvm_hal_ce_method_is_valid method_is_valid;
    uvm_hal_semaphore_release_t semaphore_release;
    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
    uvm_hal_ce_offset_out_t offset_out;
    uvm_hal_ce_offset_in_out_t offset_in_out;
    uvm_hal_ce_phys_mode_t phys_mode;
    uvm_hal_ce_plc_mode_t plc_mode;
-    uvm_hal_ce_memcopy_validate memcopy_validate;
+    uvm_hal_ce_memcopy_type_t memcopy_copy_type;
+    uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
    uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
    uvm_hal_memcopy_t memcopy;
    uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
-    uvm_hal_ce_memset_validate memset_validate;
+    uvm_hal_ce_memset_is_valid memset_is_valid;
    uvm_hal_memset_1_t memset_1;
    uvm_hal_memset_4_t memset_4;
    uvm_hal_memset_8_t memset_8;
    uvm_hal_memset_v_4_t memset_v_4;
    uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
+    uvm_hal_ce_encrypt_t encrypt;
+    uvm_hal_ce_decrypt_t decrypt;
 };

 struct uvm_arch_hal_struct
@@ -650,11 +778,12 @@ struct uvm_fault_buffer_hal_struct
    uvm_hal_fault_buffer_read_get_t read_get;
    uvm_hal_fault_buffer_write_get_t write_get;
    uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
-    uvm_hal_fault_buffer_parse_entry_t parse_entry;
+    uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
    uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
    uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
    uvm_hal_fault_buffer_entry_size_t entry_size;
    uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
+    uvm_hal_fault_buffer_get_fault_type_t get_fault_type;
 };

 struct uvm_access_counter_buffer_hal_struct
@@ -668,6 +797,14 @@ struct uvm_access_counter_buffer_hal_struct
    uvm_hal_access_counter_buffer_entry_size_t entry_size;
 };

+struct uvm_sec2_hal_struct
+{
+    uvm_hal_init_t init;
+    uvm_hal_sec2_decrypt_t decrypt;
+    uvm_hal_semaphore_release_t semaphore_release;
+    uvm_hal_semaphore_timestamp_t semaphore_timestamp;
+};
+
 typedef struct
 {
    // id is either a hardware class or GPU architecture
@@ -690,6 +827,8 @@ typedef struct
        // access_counter_buffer_ops: id is an architecture
        uvm_access_counter_buffer_hal_t access_counter_buffer_ops;

+        // sec2_ops: id is an architecture
+        uvm_sec2_hal_t sec2_ops;
    } u;
 } uvm_hal_class_ops_t;

@@ -726,4 +865,20 @@ static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
 // appropriate Host membar(s) after a TLB invalidate.
 void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);

+// Internal helper used by architectures/engines that don't support a FLUSH
+// operation with a FLUSH_TYPE on the semaphore release method, e.g., pre-Volta
+// CE. It inspects and clears the MEMBAR push flags, issues a Host WFI +
+// membar.gpu for MEMBAR_GPU or returns true to indicate the caller to use the
+// engine's FLUSH for MEMBAR_SYS.
+bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
+
+// Determine the appropriate membar to use on TLB invalidates for GPU PTE
+// permissions downgrades.
+//
+// gpu is the GPU on which the TLB invalidate is happening.
+//
+// is_local_vidmem indicates whether all mappings being invalidated pointed to
+// the local GPU's memory.
+uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
+
 #endif // __UVM_HAL_H__
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2019 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -111,6 +111,11 @@ typedef struct

    // Whether the address is virtual
    bool is_virtual;
+
+    // Whether the address resides in a non-protected memory region when the
+    // Confidential Computing feature is enabled. Default is protected.
+    // Ignored if the feature is disabled and should not be used.
+    bool is_unprotected;
 } uvm_gpu_address_t;

 // Create a virtual GPU address
@@ -123,6 +128,13 @@ static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
    return address;
 }

+static uvm_gpu_address_t uvm_gpu_address_virtual_unprotected(NvU64 va)
+{
+    uvm_gpu_address_t address = uvm_gpu_address_virtual(va);
+    address.is_unprotected = true;
+    return address;
+}
+
 // Create a physical GPU address
 static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
 {
@@ -258,8 +270,8 @@ typedef enum
    UVM_FAULT_CANCEL_VA_MODE_COUNT,
 } uvm_fault_cancel_va_mode_t;

-// Types of faults that can show up in the fault buffer. Non-UVM related faults are grouped in FATAL category
-// since we don't care about the specific type
+// Types of faults that can show up in the fault buffer. Non-UVM related faults
+// are grouped in FATAL category since we don't care about the specific type.
 typedef enum
 {
    UVM_FAULT_TYPE_INVALID_PDE = 0,
@@ -272,7 +284,8 @@ typedef enum
    // READ to WRITE-ONLY (ATS)
    UVM_FAULT_TYPE_READ,

-    // The next values are considered fatal and are not handled by the UVM driver
+    // The next values are considered fatal and are not handled by the UVM
+    // driver
    UVM_FAULT_TYPE_FATAL,

    // Values required for tools
@@ -311,10 +324,24 @@ typedef enum
    UVM_MMU_ENGINE_TYPE_COUNT,
 } uvm_mmu_engine_type_t;

+typedef enum
+{
+    // Allow entry to be fetched before the previous entry finishes ESCHED
+    // execution.
+    UVM_GPFIFO_SYNC_PROCEED = 0,
+
+    // Fetch of this entry has to wait until the previous entry has finished
+    // executing by ESCHED.
+    // For a complete engine sync the previous entry needs to include
+    // WAIT_FOR_IDLE command or other engine synchronization.
+    UVM_GPFIFO_SYNC_WAIT,
+} uvm_gpfifo_sync_t;
+
 const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type);

-// HW unit that triggered the fault. We include the fields required for fault cancelling. Including more information
-// might be useful for performance heuristics in the future
+// HW unit that triggered the fault. We include the fields required for fault
+// cancelling. Including more information might be useful for performance
+// heuristics in the future.
 typedef struct
 {
    uvm_fault_client_type_t                client_type  : order_base_2(UVM_FAULT_CLIENT_TYPE_COUNT) + 1;
@@ -429,7 +456,8 @@ typedef enum
    // Completes when all fault replays are in-flight
    UVM_FAULT_REPLAY_TYPE_START = 0,

-    // Completes when all faulting accesses have been correctly translated or faulted again
+    // Completes when all faulting accesses have been correctly translated or
+    // faulted again
    UVM_FAULT_REPLAY_TYPE_START_ACK_ALL,

    UVM_FAULT_REPLAY_TYPE_MAX
@@ -467,18 +495,18 @@ struct uvm_access_counter_buffer_entry_struct
    {
        struct
        {
-            // Instance pointer of one of the channels in the TSG that triggered the
-            // notification
+            // Instance pointer of one of the channels in the TSG that triggered
+            // the notification.
            uvm_gpu_phys_address_t instance_ptr;

            uvm_mmu_engine_type_t mmu_engine_type;

            NvU32 mmu_engine_id;

-            // Identifier of the subcontext that performed the memory accesses that
-            // triggered the notification. This value, combined with the instance_ptr,
-            // is needed to obtain the GPU VA space of the process that triggered the
-            // notification.
+            // Identifier of the subcontext that performed the memory accesses
+            // that triggered the notification. This value, combined with the
+            // instance_ptr, is needed to obtain the GPU VA space of the process
+            // that triggered the notification.
            NvU32 ve_id;

            // VA space for the address that triggered the notification
@@ -524,8 +552,8 @@ static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_t
            return UVM_PROT_READ_WRITE;

        default:
-            // Prefetch faults, if not ignored, are handled like read faults and require
-            // a mapping with, at least, READ_ONLY access permission
+            // Prefetch faults, if not ignored, are handled like read faults and
+            // requirea mapping with, at least, READ_ONLY access permission.
            return UVM_PROT_READ_ONLY;
    }
 }
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2022 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -37,19 +37,10 @@ typedef struct
    // This stores pointers to uvm_va_block_t for HMM blocks.
    uvm_range_tree_t blocks;
    uvm_mutex_t blocks_lock;
-
-    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
-    // This flag is set true by default for each va_space so most processes
-    // don't see partially implemented UVM-HMM behavior but can be enabled by
-    // test code for a given va_space so the test process can do some interim
-    // testing. It needs to be a separate flag instead of modifying
-    // uvm_disable_hmm or va_space->flags since those are user inputs and are
-    // visible/checked by test code.
-    // Remove this when UVM-HMM is fully integrated into chips_a.
-    bool disable;
 } uvm_hmm_va_space_t;

 #if UVM_IS_CONFIG_HMM()
+
    // Tells whether HMM is enabled for the given va_space.
    // If it is not enabled, all of the functions below are no-ops.
    bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
@@ -58,21 +49,27 @@ typedef struct
    bool uvm_hmm_is_enabled_system_wide(void);

    // Initialize HMM for the given the va_space.
-    // Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
-    // and the va_space lock must be held in write mode.
-    NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
-
-    // Initialize HMM for the given the va_space for testing.
-    // Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
-    // the UVM driver. Remove this when enough HMM functionality is implemented.
-    // Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
-    // and the va_space lock must be held in write mode.
-    NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
+    void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);

    // Destroy any HMM state for the given the va_space.
    // Locking: va_space lock must be held in write mode.
    void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);

+    // Unmap all page tables in this VA space which map memory owned by this
+    // GPU. Any memory still resident on this GPU will be evicted to system
+    // memory. Note that 'mm' can be NULL (e.g., when closing the UVM file)
+    // in which case any GPU memory is simply freed.
+    // Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
+    // least read mode and the va_space lock must be held in write mode.
+    void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm);
+
+    // Destroy the VA space's mappings on the GPU, if it has any.
+    // Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
+    // least read mode and the va_space lock must be held in write mode.
+    void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
+                                     uvm_gpu_va_space_t *gpu_va_space,
+                                     struct mm_struct *mm);
+
    // Find an existing HMM va_block.
    // This function can be called without having retained and locked the mm,
    // but in that case, the only allowed operations on the returned block are
@@ -91,32 +88,51 @@ typedef struct
    // address 'addr' or the VMA does not have at least PROT_READ permission.
    // The caller is also responsible for checking that there is no UVM
    // va_range covering the given address before calling this function.
-    // If va_block_context is not NULL, the VMA is cached in
-    // va_block_context->hmm.vma.
+    // The VMA is returned in vma_out if it's not NULL.
    // Locking: This function must be called with mm retained and locked for
    // at least read and the va_space lock at least for read.
    NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
                                           NvU64 addr,
-                                           uvm_va_block_context_t *va_block_context,
+                                           struct vm_area_struct **vma_out,
                                           uvm_va_block_t **va_block_ptr);

-    // Find the VMA for the given address and set va_block_context->hmm.vma.
-    // Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
-    // is no VMA associated with the address 'addr' or the VMA does not have at
-    // least PROT_READ permission.
+    // Find the VMA for the given address and return it in vma_out. Return
+    // NV_ERR_INVALID_ADDRESS if mm is NULL or there is no VMA associated with
+    // the address 'addr' or the VMA does not have at least PROT_READ
+    // permission.
    // Locking: This function must be called with mm retained and locked for
    // at least read or mm equal to NULL.
-    NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
+    NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr);

-    // If va_block is a HMM va_block, check that va_block_context->hmm.vma is
-    // not NULL and covers the given region. This always returns true and is
-    // intended to only be used with UVM_ASSERT().
+    // If va_block is a HMM va_block, check that vma is not NULL and covers the
+    // given region. This always returns true and is intended to only be used
+    // with UVM_ASSERT().
    // Locking: This function must be called with the va_block lock held and if
-    // va_block is a HMM block, va_block_context->mm must be retained and
-    // locked for at least read.
-    bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
-                                               uvm_va_block_context_t *va_block_context,
-                                               uvm_va_block_region_t region);
+    // va_block is a HMM block, va_space->va_space_mm.mm->mmap_lock must be
+    // retained and locked for at least read.
+    bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
+                                            struct vm_area_struct *vma,
+                                            uvm_va_block_region_t region);
+
+    // Initialize the HMM portion of the service_context.
+    // This should be called one time before any retry loops calling
+    // uvm_va_block_service_locked().
+    void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context);
+
+    // Begin a migration critical section. When calling into the kernel it is
+    // sometimes necessary to drop the va_block lock. This function returns
+    // NV_OK when no other thread has started a migration critical section.
+    // Otherwise, it returns NV_ERR_BUSY_RETRY and threads should then retry
+    // this function to begin a critical section.
+    // Locking: va_block lock must not be held.
+    NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block);
+
+    // Same as uvm_hmm_migrate_begin() but waits if required before beginning a
+    // critical section.
+    void uvm_hmm_migrate_begin_wait(uvm_va_block_t *va_block);
+
+    // Finish a migration critical section.
+    void uvm_hmm_migrate_finish(uvm_va_block_t *va_block);

    // Find or create a HMM va_block and mark it so the next va_block split
    // will fail for testing purposes.
@@ -168,7 +184,8 @@ typedef struct
    NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
                                             uvm_processor_id_t preferred_location,
                                             NvU64 base,
-                                             NvU64 last_address);
+                                             NvU64 last_address,
+                                             uvm_tracker_t *out_tracker);

    // Set the accessed by policy for the given range. This also tries to
    // map the range. Note that 'last_address' is inclusive.
@@ -178,7 +195,17 @@ typedef struct
                                      uvm_processor_id_t processor_id,
                                      bool set_bit,
                                      NvU64 base,
-                                      NvU64 last_address);
+                                      NvU64 last_address,
+                                      uvm_tracker_t *out_tracker);
+
+    // Deferred work item to reestablish accessed by mappings after eviction. On
+    // GPUs with access counters enabled, the evicted GPU will also get remote
+    // mappings.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked
+    // and the va_space lock must be held in at least read mode.
+    void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
+                                             uvm_va_block_t *va_block,
+                                             uvm_va_block_context_t *block_context);

    // Set the read duplication policy for the given range.
    // Note that 'last_address' is inclusive.
@@ -195,31 +222,29 @@ typedef struct
        return NV_OK;
    }

-    // This function assigns va_block_context->policy to the policy covering
-    // the given address 'addr' and assigns the ending address '*endp' to the
-    // minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
-    // ending address of the policy range. Note that va_block_context->hmm.vma
-    // is expected to be initialized before calling this function.
-    // Locking: This function must be called with
-    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
-    // the va_block lock held.
-    void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
-                                 uvm_va_block_context_t *va_block_context,
-                                 unsigned long addr,
-                                 NvU64 *endp);
+    // This function returns the policy covering the given address 'addr' and
+    // assigns the ending address '*endp' to the minimum of va_block->end,
+    // vma->vm_end - 1, and the ending address of the policy range. Locking:
+    // This function must be called with vma->vm_mm retained and locked for at
+    // least read and the va_block and va_space lock held.
+    const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                                   struct vm_area_struct *vma,
+                                                   unsigned long addr,
+                                                   NvU64 *endp);

-    // This function finds the VMA for the page index 'page_index' and assigns
-    // it to va_block_context->vma, sets va_block_context->policy to the policy
-    // covering the given address, and sets the ending page range '*outerp'
-    // to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
-    // ending address of the policy range, and va_block->end.
-    // Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
-    // Locking: This function must be called with
-    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
-    // the va_block lock held.
+    // This function finds the VMA for the page index 'page_index' and returns
+    // it in vma_out which must not be NULL. Returns the policy covering the
+    // given address, and sets the ending page range '*outerp' to the minimum of
+    // *outerp, vma->vm_end - 1, the ending address of the policy range, and
+    // va_block->end.
+    // Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise sets *vma
+    // and returns NV_OK.
+    // Locking: This function must be called with mm retained and locked for at
+    // least read and the va_block and va_space lock held.
    NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
-                                                uvm_va_block_context_t *va_block_context,
+                                                struct vm_area_struct **vma,
                                                uvm_page_index_t page_index,
+                                                const uvm_va_policy_t **policy,
                                                uvm_page_index_t *outerp);

    // Clear thrashing policy information from all HMM va_blocks.
@@ -228,27 +253,126 @@ typedef struct

    // Return the expanded region around 'address' limited to the intersection
    // of va_block start/end, vma start/end, and policy start/end.
-    // va_block_context must not be NULL, va_block_context->hmm.vma must be
-    // valid (this is usually set by uvm_hmm_va_block_find_create()), and
-    // va_block_context->policy must be valid.
-    // Locking: the caller must hold mm->mmap_lock in at least read mode, the
-    // va_space lock must be held in at least read mode, and the va_block lock
-    // held.
+    // Locking: the caller must hold va_space->va_space_mm.mm->mmap_lock in at
+    // least read mode, the va_space lock must be held in at least read mode,
+    // and the va_block lock held.
    uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
-                                                      uvm_va_block_context_t *va_block_context,
+                                                      struct vm_area_struct *vma,
+                                                      const uvm_va_policy_t *policy,
                                                      NvU64 address);

    // Return the logical protection allowed of a HMM va_block for the page at
-    // the given address.
-    // va_block_context must not be NULL and va_block_context->hmm.vma must be
-    // valid (this is usually set by uvm_hmm_va_block_find_create()).
-    // Locking: the caller must hold va_block_context->mm mmap_lock in at least
-    // read mode.
+    // the given address within the vma which must be valid. This is usually
+    // obtained from uvm_hmm_va_block_find_create()).
+    // Locking: the caller must hold va_space->va_space_mm.mm mmap_lock in at
+    // least read mode.
    uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
-                                            uvm_va_block_context_t *va_block_context,
+                                            struct vm_area_struct *vma,
                                            NvU64 addr);

-    NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp);
+    // This is called to service a GPU fault.
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
+    // the va_space read lock must be held, and the va_block lock held.
+    NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
+                                              uvm_processor_id_t new_residency,
+                                              uvm_va_block_t *va_block,
+                                              uvm_va_block_retry_t *va_block_retry,
+                                              uvm_service_block_context_t *service_context);
+
+    // This is called to migrate a region within a HMM va_block.
+    // va_block_context must not be NULL and va_block_context->hmm.vma
+    // must be valid.
+    // Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
+    // locked, and the va_block lock held.
+    NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
+                                              uvm_va_block_retry_t *va_block_retry,
+                                              uvm_va_block_context_t *va_block_context,
+                                              uvm_processor_id_t dest_id,
+                                              uvm_va_block_region_t region,
+                                              uvm_make_resident_cause_t cause);
+
+    // This is called to migrate an address range of HMM allocations via
+    // UvmMigrate().
+    //
+    // va_block_context must not be NULL. The caller is not required to set
+    // va_block_context->hmm.vma.
+    //
+    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
+    // the va_space read lock must be held.
+    NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
+                                     uvm_va_block_context_t *va_block_context,
+                                     NvU64 base,
+                                     NvU64 length,
+                                     uvm_processor_id_t dest_id,
+                                     uvm_migrate_mode_t mode,
+                                     uvm_tracker_t *out_tracker);
+
+    // Evicts all va_blocks in the va_space to the CPU. Unlike the
+    // other va_block eviction functions this is based on virtual
+    // address and therefore takes mmap_lock for read.
+    void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space);
+
+    // This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
+    // PFN for the GPU chunk memory.
+    NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
+                                                uvm_va_block_context_t *va_block_context,
+                                                uvm_gpu_chunk_t *gpu_chunk,
+                                                uvm_va_block_region_t chunk_region);
+
+    // Migrate pages to system memory for the given page mask.
+    // Note that the mmap lock is not held and there is no MM retained.
+    // This must be called after uvm_hmm_va_block_evict_chunk_prep() has
+    // initialized va_block_context->hmm.src_pfns[] for the source GPU physical
+    // PFNs being migrated. Note that the input mask 'pages_to_evict' can be
+    // modified. If any of the evicted pages has the accessed by policy set,
+    // then record that by setting out_accessed_by_set.
+    // Locking: the va_block lock must be locked.
+    NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
+                                            uvm_va_block_context_t *va_block_context,
+                                            const uvm_page_mask_t *pages_to_evict,
+                                            uvm_va_block_region_t region,
+                                            bool *out_accessed_by_set);
+
+    // Migrate pages from the given GPU to system memory for the given page
+    // mask and region. va_block_context must not be NULL.
+    // Note that the mmap lock is not held and there is no MM retained.
+    // Locking: the va_block lock must be locked.
+    NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
+                                                    uvm_gpu_t *gpu,
+                                                    uvm_va_block_context_t *va_block_context,
+                                                    const uvm_page_mask_t *pages_to_evict,
+                                                    uvm_va_block_region_t region);
+
+    // Migrate a GPU device-private page to system memory. This is
+    // called to remove CPU page table references to device private
+    // struct pages for the given GPU after all other references in
+    // va_blocks have been released and the GPU is in the process of
+    // being removed/torn down. Note that there is no mm, VMA,
+    // va_block or any user channel activity on this GPU.
+    NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn);
+
+    // This returns what would be the intersection of va_block start/end and
+    // VMA start/end-1 for the given 'lookup_address' if
+    // uvm_hmm_va_block_find_create() was called.
+    // Locking: the caller must hold mm->mmap_lock in at least read mode and
+    // the va_space lock must be held in at least read mode.
+    NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
+                                            struct mm_struct *mm,
+                                            NvU64 lookup_address,
+                                            NvU64 *startp,
+                                            NvU64 *endp,
+                                            UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params);
+
+    // This updates the HMM va_block CPU residency information for a single
+    // page at 'lookup_address' by calling hmm_range_fault(). If 'populate' is
+    // true, the CPU page will be faulted in read/write or read-only
+    // (depending on the permission of the underlying VMA at lookup_address).
+    // Locking: the caller must hold mm->mmap_lock in at least read mode and
+    // the va_space lock must be held in at least read mode.
+    NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
+                                                     struct mm_struct *mm,
+                                                     NvU64 lookup_address,
+                                                     bool populate);

    NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
                                              struct file *filp);
@@ -280,20 +404,24 @@ typedef struct
        return false;
    }

-    static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
+    static void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
    {
-        return NV_OK;
-    }
-
-    static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
-    {
-        return NV_WARN_NOTHING_TO_DO;
    }

    static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
    {
    }

+    static void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm)
+    {
+    }
+
+    static void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
+                                            uvm_gpu_va_space_t *gpu_va_space,
+                                            struct mm_struct *mm)
+    {
+    }
+
    static NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
                                           NvU64 addr,
                                           uvm_va_block_t **va_block_ptr)
@@ -303,24 +431,41 @@ typedef struct

    static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
                                                  NvU64 addr,
-                                                  uvm_va_block_context_t *va_block_context,
+                                                  struct vm_area_struct **vma,
                                                  uvm_va_block_t **va_block_ptr)
    {
        return NV_ERR_INVALID_ADDRESS;
    }

-    static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
+    static NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma, NvU64 addr)
    {
        return NV_OK;
    }

-    static bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
-                                                      uvm_va_block_context_t *va_block_context,
-                                                      uvm_va_block_region_t region)
+    static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
+                                                   struct vm_area_struct *vma,
+                                                   uvm_va_block_region_t region)
    {
        return true;
    }

+    static void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
+    {
+    }
+
+    static NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
+    {
+        return NV_OK;
+    }
+
+    static void uvm_hmm_migrate_begin_wait(uvm_va_block_t *va_block)
+    {
+    }
+
+    static void uvm_hmm_migrate_finish(uvm_va_block_t *va_block)
+    {
+    }
+
    static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
    {
        return NV_ERR_INVALID_ADDRESS;
@@ -349,7 +494,8 @@ typedef struct
    static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
                                                    uvm_processor_id_t preferred_location,
                                                    NvU64 base,
-                                                    NvU64 last_address)
+                                                    NvU64 last_address,
+                                                    uvm_tracker_t *out_tracker)
    {
        return NV_ERR_INVALID_ADDRESS;
    }
@@ -358,11 +504,18 @@ typedef struct
                                             uvm_processor_id_t processor_id,
                                             bool set_bit,
                                             NvU64 base,
-                                             NvU64 last_address)
+                                             NvU64 last_address,
+                                             uvm_tracker_t *out_tracker)
    {
        return NV_ERR_INVALID_ADDRESS;
    }

+    static void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
+                                                    uvm_va_block_t *va_block,
+                                                    uvm_va_block_context_t *block_context)
+    {
+    }
+
    static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
                                                  uvm_read_duplication_policy_t new_policy,
                                                  NvU64 base,
@@ -371,16 +524,19 @@ typedef struct
        return NV_ERR_INVALID_ADDRESS;
    }

-    static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
-                                        uvm_va_block_context_t *va_block_context,
-                                        unsigned long addr,
-                                        NvU64 *endp)
+    static const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                                          struct vm_area_struct *vma,
+                                                          unsigned long addr,
+                                                          NvU64 *endp)
    {
+        UVM_ASSERT(0);
+        return NULL;
    }

    static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
-                                                       uvm_va_block_context_t *va_block_context,
+                                                       struct vm_area_struct **vma,
                                                       uvm_page_index_t page_index,
+                                                       const uvm_va_policy_t **policy,
                                                       uvm_page_index_t *outerp)
    {
        return NV_OK;
@@ -392,22 +548,101 @@ typedef struct
    }

    static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
-                                                             uvm_va_block_context_t *va_block_context,
+                                                             struct vm_area_struct *vma,
+                                                             const uvm_va_policy_t *policy,
                                                             NvU64 address)
    {
        return (uvm_va_block_region_t){};
    }

    static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
-                                                   uvm_va_block_context_t *va_block_context,
+                                                   struct vm_area_struct *vma,
                                                   NvU64 addr)
    {
        return UVM_PROT_NONE;
    }

-    static NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
+    static NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
+                                                     uvm_processor_id_t new_residency,
+                                                     uvm_va_block_t *va_block,
+                                                     uvm_va_block_retry_t *va_block_retry,
+                                                     uvm_service_block_context_t *service_context)
    {
-        return NV_WARN_NOTHING_TO_DO;
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
+                                                     uvm_va_block_retry_t *va_block_retry,
+                                                     uvm_va_block_context_t *va_block_context,
+                                                     uvm_processor_id_t dest_id,
+                                                     uvm_va_block_region_t region,
+                                                     uvm_make_resident_cause_t cause)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
+                                            uvm_va_block_context_t *va_block_context,
+                                            NvU64 base,
+                                            NvU64 length,
+                                            uvm_processor_id_t dest_id,
+                                            uvm_migrate_mode_t mode,
+                                            uvm_tracker_t *out_tracker)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
+    {
+    }
+
+    static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
+                                                       uvm_va_block_context_t *va_block_context,
+                                                       uvm_gpu_chunk_t *gpu_chunk,
+                                                       uvm_va_block_region_t chunk_region)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
+                                                   uvm_va_block_context_t *va_block_context,
+                                                   const uvm_page_mask_t *pages_to_evict,
+                                                   uvm_va_block_region_t region,
+                                                   bool *out_accessed_by_set)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
+                                                           uvm_gpu_t *gpu,
+                                                           uvm_va_block_context_t *va_block_context,
+                                                           const uvm_page_mask_t *pages_to_evict,
+                                                           uvm_va_block_region_t region)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
+    {
+        return NV_OK;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
+                                                   struct mm_struct *mm,
+                                                   NvU64 lookup_address,
+                                                   NvU64 *startp,
+                                                   NvU64 *endp,
+                                                   UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params)
+    {
+        return NV_ERR_INVALID_ADDRESS;
+    }
+
+    static NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
+                                                            struct mm_struct *mm,
+                                                            NvU64 lookup_address,
+                                                            bool populate)
+    {
+        return NV_ERR_INVALID_ADDRESS;
    }

    static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
--- a/kernel-open/nvidia-uvm/uvm_hmm_sanity_test.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm_sanity_test.c
@@ -1,90 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2021-2022 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_common.h"
-#include "uvm_linux.h"
-#include "uvm_test.h"
-#include "uvm_va_space.h"
-#include "uvm_va_range.h"
-#include "uvm_hmm.h"
-
-NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
-{
-    uvm_va_space_t *va_space = uvm_va_space_get(filp);
-    struct mm_struct *mm;
-    uvm_va_block_t *hmm_block = NULL;
-    NV_STATUS status;
-
-    mm = uvm_va_space_mm_or_current_retain(va_space);
-    if (!mm)
-        return NV_WARN_NOTHING_TO_DO;
-
-    uvm_down_write_mmap_lock(mm);
-    uvm_va_space_down_write(va_space);
-
-    // TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
-    // By default, HMM is enabled system wide but disabled per va_space.
-    // This will initialize the va_space for HMM.
-    status = uvm_hmm_va_space_initialize_test(va_space);
-    if (status != NV_OK)
-        goto out;
-
-    uvm_va_space_up_write(va_space);
-    uvm_up_write_mmap_lock(mm);
-
-    uvm_down_read_mmap_lock(mm);
-    uvm_va_space_down_read(va_space);
-
-    // Try to create an HMM va_block to virtual address zero (NULL).
-    // It should fail. There should be no VMA but a va_block for range
-    // [0x0 0x1fffff] is possible.
-    status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
-    TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
-
-    // Try to create an HMM va_block which overlaps a managed block.
-    // It should fail.
-    status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
-    TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
-
-    // Try to create an HMM va_block; it should succeed.
-    status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
-    TEST_CHECK_GOTO(status == NV_OK, done);
-
-    // Try to find an existing HMM va_block; it should succeed.
-    status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
-    TEST_CHECK_GOTO(status == NV_OK, done);
-
-done:
-    uvm_va_space_up_read(va_space);
-    uvm_up_read_mmap_lock(mm);
-    uvm_va_space_mm_or_current_release(va_space, mm);
-
-    return status;
-
-out:
-    uvm_va_space_up_write(va_space);
-    uvm_up_write_mmap_lock(mm);
-    uvm_va_space_mm_or_current_release(va_space, mm);
-
-    return status;
-}
--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@@ -49,12 +49,23 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    // A single top level PDE on Hopper covers 64 PB and that's the minimum
    // size that can be used.
    parent_gpu->rm_va_base = 0;
-    parent_gpu->rm_va_size = 64ull * 1024 * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;

-    parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
+    parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

-    parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
+    // See uvm_mmu.h for mapping placement
+    parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (8 * UVM_SIZE_1TB);
+
+    // Physical CE writes to vidmem are non-coherent with respect to the CPU on
+    // GH180.
+    parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
+
+    // TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
+    //                    portion for the uvm tests became nonresponsive after
+    //                    some time and then failed even after reboot
+    parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
+                                                           UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;

    // All GR context buffers may be mapped to 57b wide VAs. All "compute" units
    // accessing GR context buffers support the 57-bit VA range.
--- a/kernel-open/nvidia-uvm/uvm_hopper_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_ce.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020 NVIDIA Corporation
+    Copyright (c) 2020-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -23,25 +23,10 @@

 #include "uvm_hal.h"
 #include "uvm_push.h"
+#include "uvm_mem.h"
+#include "uvm_conf_computing.h"
 #include "clc8b5.h"

-static void hopper_membar_after_transfer(uvm_push_t *push)
-{
-    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-
-    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
-        return;
-
-    // TODO: [UVM-Volta] Remove Host WFI + Membar WAR for CE flush-only bug
-    // http://nvbugs/1734761
-    gpu->parent->host_hal->wait_for_idle(push);
-
-    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
-        gpu->parent->host_hal->membar_gpu(push);
-    else
-        gpu->parent->host_hal->membar_sys(push);
-}
-
 static NvU32 ce_aperture(uvm_aperture_t aperture)
 {
    BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
@@ -78,45 +63,32 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
                     OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
 }

-// Perform an appropriate membar before a semaphore operation. Returns whether
-// the semaphore operation should include a flush.
-static bool hopper_membar_before_semaphore(uvm_push_t *push)
+// Return the flush type and the flush enablement.
+static NvU32 hopper_get_flush_value(uvm_push_t *push)
 {
-    uvm_gpu_t *gpu;
+    NvU32 flush_value;
+    uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);

-    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
+    if (membar == UVM_MEMBAR_NONE) {
        // No MEMBAR requested, don't use a flush.
-        return false;
+        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
+    }
+    else {
+        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
+
+        if (membar == UVM_MEMBAR_GPU)
+            flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, GL);
+        else
+            flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, SYS);
    }

-    if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
-        // By default do a MEMBAR SYS and for that we can just use flush on the
-        // semaphore operation.
-        return true;
-    }
-
-    // TODO: Bug 1734761: Remove the HOST WFI+membar WAR, i.e, perform the CE
-    // flush when MEMBAR GPU is requested.
-    gpu = uvm_push_get_gpu(push);
-    gpu->parent->host_hal->wait_for_idle(push);
-    gpu->parent->host_hal->membar_gpu(push);
-
-    return false;
+    return flush_value;
 }

 void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-    NvU32 flush_value;
    NvU32 launch_dma_plc_mode;
-    bool use_flush;
-
-    use_flush = hopper_membar_before_semaphore(push);
-
-    if (use_flush)
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
-    else
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);

    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
@@ -124,25 +96,17 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p

    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();

-    NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
+    NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
       HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
-       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_NO_TIMESTAMP) |
       launch_dma_plc_mode);
 }

 void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-    NvU32 flush_value;
    NvU32 launch_dma_plc_mode;
-    bool use_flush;
-
-    use_flush = hopper_membar_before_semaphore(push);
-
-    if (use_flush)
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
-    else
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);

    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
@@ -150,9 +114,10 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N

    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();

-    NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
+    NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
       HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
-       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_NO_TIMESTAMP) |
       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE) |
@@ -162,16 +127,7 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
 void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
 {
    uvm_gpu_t *gpu;
-    NvU32 flush_value;
    NvU32 launch_dma_plc_mode;
-    bool use_flush;
-
-    use_flush = hopper_membar_before_semaphore(push);
-
-    if (use_flush)
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
-    else
-        flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);

    NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
                     SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
@@ -180,9 +136,10 @@ void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
    gpu = uvm_push_get_gpu(push);
    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();

-    NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
+    NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
       HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
-       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_PAYLOAD_SIZE, ONE_WORD) |
+       HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_SEMAPHORE_WITH_TIMESTAMP) |
       launch_dma_plc_mode);
 }

@@ -195,12 +152,46 @@ static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t ds
    return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
 }

-static bool hopper_scrub_enable(uvm_gpu_address_t dst, size_t size)
+static bool va_is_flat_vidmem(uvm_gpu_t *gpu, NvU64 va)
 {
-    return !dst.is_virtual &&
-           dst.aperture == UVM_APERTURE_VID &&
-           IS_ALIGNED(dst.address, UVM_PAGE_SIZE_4K) &&
-           IS_ALIGNED(size, UVM_PAGE_SIZE_4K);
+    return (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu)) &&
+           va >= gpu->parent->flat_vidmem_va_base &&
+           va < gpu->parent->flat_vidmem_va_base + UVM_GPU_MAX_PHYS_MEM;
+}
+
+// Return whether a memset should use the fast scrubber. If so, convert dst to
+// the address needed by the fast scrubber.
+static bool hopper_scrub_enable(uvm_gpu_t *gpu, uvm_gpu_address_t *dst, size_t size)
+{
+    if (!IS_ALIGNED(dst->address, UVM_PAGE_SIZE_4K) || !IS_ALIGNED(size, UVM_PAGE_SIZE_4K))
+        return false;
+
+    // When CE physical writes are disallowed, higher layers will convert
+    // physical memsets to virtual using the flat mapping. Those layers are
+    // unaware of the fast scrubber, which is safe to use specifically when CE
+    // physical access is disallowed. Detect such memsets within the flat vidmem
+    // region and convert them back to physical, since the fast scrubber only
+    // works with physical addressing.
+    if (dst->is_virtual && !gpu->parent->ce_phys_vidmem_write_supported && va_is_flat_vidmem(gpu, dst->address)) {
+        *dst = uvm_gpu_address_physical(UVM_APERTURE_VID, dst->address - gpu->parent->flat_vidmem_va_base);
+        return true;
+    }
+
+    return !dst->is_virtual && dst->aperture == UVM_APERTURE_VID;
+}
+
+static NvU32 hopper_memset_copy_type(uvm_push_t *push, uvm_gpu_address_t dst)
+{
+    if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected)
+        return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
+    return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
+}
+
+NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected && src.is_unprotected)
+        return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
+    return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
 }

 static void hopper_memset_common(uvm_push_t *push,
@@ -218,8 +209,11 @@ static void hopper_memset_common(uvm_push_t *push,
    NvU32 launch_dma_plc_mode;
    NvU32 launch_dma_remap_enable;
    NvU32 launch_dma_scrub_enable;
+    NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
+    NvU32 copy_type_value = hopper_memset_copy_type(push, dst);
+    bool is_scrub = hopper_scrub_enable(gpu, &dst, num_elements * memset_element_size);

-    UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
+    UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, num_elements, memset_element_size),
                   "Memset validation failed in channel %s, GPU %s",
                   push->channel->name,
                   uvm_gpu_name(gpu));
@@ -232,7 +226,7 @@ static void hopper_memset_common(uvm_push_t *push,
    else
        pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);

-    if (memset_element_size == 8 && hopper_scrub_enable(dst, num_elements * memset_element_size)) {
+    if (memset_element_size == 8 && is_scrub) {
        launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE);
        launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, TRUE);

@@ -252,6 +246,10 @@ static void hopper_memset_common(uvm_push_t *push,
    do {
        NvU32 memset_this_time = (NvU32)min(num_elements, max_single_memset);

+        // In the last operation, a flush/membar may be issued after the memset.
+        if (num_elements == memset_this_time)
+            flush_value = hopper_get_flush_value(push);
+
        gpu->parent->ce_hal->offset_out(push, dst.address);

        NV_PUSH_1U(C8B5, LINE_LENGTH_IN, memset_this_time);
@@ -260,19 +258,18 @@ static void hopper_memset_common(uvm_push_t *push,
           HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
           HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
           HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
-           HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
+           flush_value |
           launch_dma_remap_enable |
           launch_dma_scrub_enable |
           launch_dma_dst_type |
           launch_dma_plc_mode |
+           copy_type_value |
           pipelined_value);

        dst.address += memset_this_time * memset_element_size;
        num_elements -= memset_this_time;
-        pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
+        pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
    } while (num_elements > 0);
-
-    hopper_membar_after_transfer(push);
 }

 void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
@@ -294,7 +291,7 @@ void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 v

 void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
 {
-    if (hopper_scrub_enable(dst, size)) {
+    if (hopper_scrub_enable(uvm_push_get_gpu(push), &dst, size)) {
        NvU64 value64 = value;

        value64 |= value64 << 8;
@@ -318,7 +315,7 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v
 {
    UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);

-    if (hopper_scrub_enable(dst, size)) {
+    if (hopper_scrub_enable(uvm_push_get_gpu(push), &dst, size)) {
        NvU64 value64 = value;

        value64 |= value64 << 32;
@@ -337,3 +334,234 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v

    hopper_memset_common(push, dst, size, 4);
 }
+
+bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
+                                       uvm_gpu_address_t dst,
+                                       size_t num_elements,
+                                       size_t element_size)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    // In HCC, if a memset uses physical addressing for the destination, then
+    // it must write to (protected) vidmem. If the memset uses virtual
+    // addressing, and the backing storage is not vidmem, the access is only
+    // legal if the copy type is NONPROT2NONPROT, and the destination is
+    // unprotected sysmem, but the validation does not detect it.
+    if (uvm_conf_computing_mode_is_hcc(gpu) && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
+        return false;
+
+    if (!gpu->parent->ce_phys_vidmem_write_supported) {
+        size_t size = num_elements * element_size;
+        uvm_gpu_address_t temp = dst;
+
+        // Physical vidmem writes are disallowed, unless using the scrubber
+        if (!dst.is_virtual && dst.aperture == UVM_APERTURE_VID && !hopper_scrub_enable(gpu, &temp, size)) {
+            UVM_ERR_PRINT("Destination address of vidmem memset must be virtual, not physical: {%s, 0x%llx} size %zu\n",
+                          uvm_gpu_address_aperture_string(dst),
+                          dst.address,
+                          size);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (uvm_conf_computing_mode_is_hcc(gpu)) {
+        // In HCC, if a memcopy uses physical addressing for either the
+        // destination or the source, then the corresponding aperture must be
+        // vidmem. If virtual addressing is used, and the backing storage is
+        // sysmem the access is only legal if the copy type is NONPROT2NONPROT,
+        // but the validation does not detect it. In other words the copy
+        // source and destination is unprotected sysmem.
+        if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
+            return false;
+
+        if (!dst.is_virtual && (dst.aperture != UVM_APERTURE_VID))
+            return false;
+
+        if (dst.is_unprotected != src.is_unprotected)
+            return false;
+    }
+
+    if (!gpu->parent->ce_phys_vidmem_write_supported && !dst.is_virtual && dst.aperture == UVM_APERTURE_VID) {
+        UVM_ERR_PRINT("Destination address of vidmem memcopy must be virtual, not physical: {%s, 0x%llx}\n",
+                      uvm_gpu_address_aperture_string(dst),
+                      dst.address);
+        return false;
+    }
+
+    return true;
+}
+
+// Specialized version of uvm_hal_volta_ce_memcopy used for encryption and
+// decryption. Pre-Hopper functionality, such as validation or address patching,
+// has been removed.
+static void encrypt_or_decrypt(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, NvU32 size)
+{
+    NvU32 pipelined_value;
+    NvU32 launch_dma_src_dst_type;
+    NvU32 launch_dma_plc_mode;
+    NvU32 flush_value;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    // HW allows unaligned operations only if the entire buffer is in one 32B
+    // sector. Operations on buffers larger than 32B have to be aligned.
+    if (size > UVM_CONF_COMPUTING_BUF_ALIGNMENT) {
+        UVM_ASSERT(IS_ALIGNED(src.address, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
+        UVM_ASSERT(IS_ALIGNED(dst.address, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
+    }
+    else {
+        UVM_ASSERT((dst.address >> UVM_CONF_COMPUTING_BUF_ALIGNMENT) ==
+                   ((dst.address + size - 1) >> UVM_CONF_COMPUTING_BUF_ALIGNMENT));
+        UVM_ASSERT((src.address >> UVM_CONF_COMPUTING_BUF_ALIGNMENT) ==
+                   ((src.address + size - 1) >> UVM_CONF_COMPUTING_BUF_ALIGNMENT));
+    }
+
+    launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
+    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
+
+    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
+        pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
+    else
+        pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
+
+    flush_value = hopper_get_flush_value(push);
+
+    gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
+
+    NV_PUSH_1U(C8B5, LINE_LENGTH_IN, size);
+
+    NV_PUSH_1U(C8B5, LAUNCH_DMA, HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
+                                 HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
+                                 HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
+                                 HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
+                                 HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, SECURE) |
+                                 flush_value |
+                                 launch_dma_src_dst_type |
+                                 launch_dma_plc_mode |
+                                 pipelined_value);
+}
+
+// The GPU CE encrypt operation requires clients to pass a valid
+// address where the used IV will be written. But this requirement is
+// unnecessary, because UVM should instead rely on the CSL
+// nvUvmInterfaceCslLogDeviceEncryption API to independently track
+// the expected IV.
+//
+// To satisfy the HW requirement the same unprotected sysmem address is
+// passed to all GPU-side encryptions. This dummy buffer is allocated at
+// GPU initialization time.
+static NvU64 encrypt_iv_address(uvm_push_t *push, uvm_gpu_address_t dst)
+{
+    NvU64 iv_address;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    // Match addressing mode of destination and IV
+    if (dst.is_virtual) {
+        iv_address = uvm_rm_mem_get_gpu_va(gpu->conf_computing.iv_rm_mem, gpu, false).address;
+    }
+    else {
+        iv_address = uvm_mem_gpu_physical(gpu->conf_computing.iv_mem,
+                                          gpu,
+                                          0,
+                                          gpu->conf_computing.iv_mem->size).address;
+    }
+
+    UVM_ASSERT(IS_ALIGNED(iv_address, UVM_CONF_COMPUTING_IV_ALIGNMENT));
+
+    return iv_address;
+}
+
+// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
+void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
+                               uvm_gpu_address_t dst,
+                               uvm_gpu_address_t src,
+                               NvU32 size,
+                               uvm_gpu_address_t auth_tag)
+{
+
+    NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
+    NvU64 iv_address;
+    NvU32 iv_address_hi32, iv_address_lo32;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
+    UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
+
+    if (!src.is_virtual)
+        UVM_ASSERT(src.aperture == UVM_APERTURE_VID);
+
+    // The addressing mode (and aperture, if applicable) of the destination
+    // pointer determines the addressing mode and aperture used by the
+    // encryption to reference the other two addresses written by it:
+    // authentication tag, and IV. If the client passes a sysmem physical
+    // address as destination, then the authentication tag must also be a sysmem
+    // physical address.
+    UVM_ASSERT(dst.is_virtual == auth_tag.is_virtual);
+
+    if (!dst.is_virtual) {
+        UVM_ASSERT(dst.aperture == UVM_APERTURE_SYS);
+        UVM_ASSERT(auth_tag.aperture == UVM_APERTURE_SYS);
+    }
+
+    NV_PUSH_1U(C8B5, SET_SECURE_COPY_MODE, HWCONST(C8B5, SET_SECURE_COPY_MODE, MODE, ENCRYPT));
+
+    auth_tag_address_hi32 = HWVALUE(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_UPPER, UPPER, NvU64_HI32(auth_tag.address));
+    auth_tag_address_lo32 = HWVALUE(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_LOWER, LOWER, NvU64_LO32(auth_tag.address));
+
+    iv_address = encrypt_iv_address(push, dst);
+
+    iv_address_hi32 = HWVALUE(C8B5, SET_ENCRYPT_IV_ADDR_UPPER, UPPER, NvU64_HI32(iv_address));
+    iv_address_lo32 = HWVALUE(C8B5, SET_ENCRYPT_IV_ADDR_LOWER, LOWER, NvU64_LO32(iv_address));
+
+    NV_PUSH_4U(C8B5, SET_ENCRYPT_AUTH_TAG_ADDR_UPPER, auth_tag_address_hi32,
+                     SET_ENCRYPT_AUTH_TAG_ADDR_LOWER, auth_tag_address_lo32,
+                     SET_ENCRYPT_IV_ADDR_UPPER, iv_address_hi32,
+                     SET_ENCRYPT_IV_ADDR_LOWER, iv_address_lo32);
+
+    encrypt_or_decrypt(push, dst, src, size);
+}
+
+// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
+void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
+                               uvm_gpu_address_t dst,
+                               uvm_gpu_address_t src,
+                               NvU32 size,
+                               uvm_gpu_address_t auth_tag)
+{
+
+    NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
+    UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
+
+    // The addressing mode (and aperture, if applicable) of the source and
+    // authentication pointers should match. But unlike in the encryption case,
+    // clients are not forced to pass a valid IV address.
+    UVM_ASSERT(src.is_virtual == auth_tag.is_virtual);
+
+    if (!src.is_virtual) {
+        UVM_ASSERT(src.aperture == UVM_APERTURE_SYS);
+        UVM_ASSERT(auth_tag.aperture == UVM_APERTURE_SYS);
+    }
+
+    if (!dst.is_virtual)
+        UVM_ASSERT(dst.aperture == UVM_APERTURE_VID);
+
+    NV_PUSH_1U(C8B5, SET_SECURE_COPY_MODE, HWCONST(C8B5, SET_SECURE_COPY_MODE, MODE, DECRYPT));
+
+    auth_tag_address_hi32 = HWVALUE(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER, UPPER, NvU64_HI32(auth_tag.address));
+    auth_tag_address_lo32 = HWVALUE(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER, LOWER, NvU64_LO32(auth_tag.address));
+
+    NV_PUSH_2U(C8B5, SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER, auth_tag_address_hi32,
+                     SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER, auth_tag_address_lo32);
+
+    encrypt_or_decrypt(push, dst, src, size);
+}
+
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bernhard Stoeckner	a8e01be6b2	535.104.05	2023-08-22 15:09:37 +02:00
Bernhard Stoeckner	12c0739352	535.98	2023-08-08 18:28:38 +02:00
Bernhard Stoeckner	29f830f1bb	535.86.10	2023-07-31 18:17:14 +02:00
Bernhard Stoeckner	337e28efda	535.86.05	2023-07-18 16:00:22 +02:00
Bernhard Stoeckner	22a077c4fe	issue template: be clearer about issues with prop driver	2023-07-10 15:58:02 +02:00
Andy Ritger	26458140be	535.54.03	2023-06-14 12:37:59 -07:00
Andy Ritger	eb5c7665a1	535.43.02	2023-05-30 10:11:36 -07:00
Andy Ritger	6dd092ddb7	530.41.03	2023-03-23 11:00:12 -07:00
Andy Ritger	4397463e73	530.30.02	2023-02-28 11:12:44 -08:00