550.54.14

2026-01-26 19:19:47 +00:00 · 2024-02-23 16:37:56 +01:00
parent 91676d6628
commit 476bd34534
186 changed files with 42509 additions and 37629 deletions
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -621,6 +621,14 @@ typedef enum
 #define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)    \
        (((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)

+/*
+ * For console setup by EFI GOP, the base address is BAR1.
+ * For console setup by VBIOS, the base address is BAR2 + 16MB.
+ */
+#define NV_IS_CONSOLE_MAPPED(nv, addr)  \
+        (((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
+         ((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
+
 #define NV_SOC_IS_ISO_IOMMU_PRESENT(nv)     \
        ((nv)->iommus.iso_iommu_present)

@@ -878,6 +886,8 @@ NvBool    NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
 NvU32     NV_API_CALL nv_get_os_type(void);

 void      NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
+void      NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
+
 struct dma_buf;
 typedef struct nv_dma_buf nv_dma_buf_t;
 struct drm_gem_object;
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -956,12 +956,20 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
    - This function should not be called when interrupts are disabled.

    Arguments:
-        device[IN]        - Device handle associated with the gpu
+        pFaultInfo[IN]        - information provided by RM for fault handling.
+                                used for obtaining the device handle without locks.
+        bCopyAndFlush[IN]     - Instructs RM to perform the flush in the Copy+Flush mode.
+                                In this mode, RM will perform a copy of the packets from
+                                the HW buffer to UVM's SW buffer as part of performing
+                                the flush. This mode gives UVM the opportunity to observe
+                                the packets contained within the HW buffer at the time
+                                of issuing the call.

    Error codes:
      NV_ERR_INVALID_ARGUMENT
 */
-NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
+NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
+                                                   NvBool bCopyAndFlush);

 /*******************************************************************************
    nvUvmInterfaceTogglePrefetchFaults
@@ -982,7 +990,8 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
    Error codes:
      NV_ERR_INVALID_ARGUMENT
 */
-NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
+NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
+                                             NvBool bEnable);

 /*******************************************************************************
    nvUvmInterfaceInitAccessCntrInfo
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -700,8 +700,10 @@ typedef struct UvmGpuInfo_tag
    // local EGM properties
    // NV_TRUE if EGM is enabled
    NvBool   egmEnabled;
+
    // Peer ID to reach local EGM when EGM is enabled
    NvU8     egmPeerId;
+
    // EGM base address to offset in the GMMU PTE entry for EGM mappings
    NvU64    egmBaseAddr;
 } UvmGpuInfo;
@@ -712,9 +714,10 @@ typedef struct UvmGpuFbInfo_tag
    // RM regions that are not registered with PMA either.
    NvU64 maxAllocatableAddress;

-    NvU32 heapSize;         // RAM in KB available for user allocations
-    NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
-    NvBool bZeroFb;         // Zero FB mode enabled.
+    NvU32 heapSize;          // RAM in KB available for user allocations
+    NvU32 reservedHeapSize;  // RAM in KB reserved for internal RM allocation
+    NvBool bZeroFb;          // Zero FB mode enabled.
+    NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
 } UvmGpuFbInfo;

 typedef struct UvmGpuEccInfo_tag
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -160,10 +160,9 @@ NvBool      NV_API_CALL  os_is_vgx_hyper             (void);
 NV_STATUS   NV_API_CALL  os_inject_vgx_msi           (NvU16, NvU64, NvU32);
 NvBool      NV_API_CALL  os_is_grid_supported        (void);
 NvU32       NV_API_CALL  os_get_grid_csp_support     (void);
-void        NV_API_CALL  os_get_screen_info          (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
 void        NV_API_CALL  os_bug_check                (NvU32, const char *);
 NV_STATUS   NV_API_CALL  os_lock_user_pages          (void *, NvU64, void **, NvU32);
-NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **, void**);
+NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **);
 NV_STATUS   NV_API_CALL  os_unlock_user_pages        (NvU64, void *);
 NV_STATUS   NV_API_CALL  os_match_mmap_offset        (void *, NvU64, NvU64 *);
 NV_STATUS   NV_API_CALL  os_get_euid                 (NvU32 *);
@@ -198,6 +197,8 @@ nv_cap_t*   NV_API_CALL  os_nv_cap_create_file_entry  (nv_cap_t *, const char *,
 void        NV_API_CALL  os_nv_cap_destroy_entry      (nv_cap_t *);
 int         NV_API_CALL  os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
 void        NV_API_CALL  os_nv_cap_close_fd           (int);
+NvS32       NV_API_CALL  os_imex_channel_get          (NvU64);
+NvS32       NV_API_CALL  os_imex_channel_count        (void);

 enum os_pci_req_atomics_type {
    OS_INTF_PCIE_REQ_ATOMICS_32BIT,
@@ -219,6 +220,7 @@ extern NvU8  os_page_shift;
 extern NvBool os_cc_enabled;
 extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;
+extern NvBool os_imex_channel_is_supported;

 /*
 * ---------------------------------------------------------------------------
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@@ -75,7 +75,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
-NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -96,5 +96,6 @@ NV_HEADER_PRESENCE_TESTS = \
  soc/tegra/bpmp.h \
  linux/sync_file.h \
  linux/cc_platform.h \
-  asm/cpufeature.h
+  asm/cpufeature.h \
+  linux/mpi.h

--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -58,7 +58,7 @@
 #ifndef _UVM_H_
 #define _UVM_H_

-#define UVM_API_LATEST_REVISION 9
+#define UVM_API_LATEST_REVISION 11

 #if !defined(UVM_API_REVISION)
 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@@ -297,7 +297,9 @@ NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU for which pageable memory access support is queried.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition for which
+//         pageable memory access support is queried.
 //
 //     pageableMemAccess: (OUTPUT)
 //         Returns true (non-zero) if the GPU represented by gpuUuid supports
@@ -327,6 +329,12 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
 // usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
 // process results in an error.
 //
+// After successfully registering a GPU partition, all subsequent API calls
+// which take a NvProcessorUuid argument (including UvmGpuMappingAttributes),
+// must use the GI partition UUID which can be obtained with
+// NvRmControl(NVC637_CTRL_CMD_GET_UUID). Otherwise, if the GPU is not SMC
+// capable or SMC enabled, the physical GPU UUID must be used.
+//
 // Arguments:
 //     gpuUuid: (INPUT)
 //         UUID of the physical GPU to register.
@@ -431,7 +439,8 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to unregister.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to unregister.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@@ -489,7 +498,8 @@ NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to register.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to register.
 //
 //     platformParams: (INPUT)
 //         On Linux: RM ctrl fd, hClient and hVaSpace.
@@ -560,7 +570,9 @@ NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid             *gpuUuid,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU whose VA space should be unregistered.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition whose VA space
+//         should be unregistered.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@@ -590,7 +602,7 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
 //
 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are
 // not connected or are connected over an interconnect different than PCIe
-// (NVLink, for example).
+// (NVLink or SMC partitions, for example).
 //
 // If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
 // must support the same set of page sizes for GPU mappings.
@@ -603,10 +615,12 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuidA: (INPUT)
-//         UUID of GPU A.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition A.
 //
 //     gpuUuidB: (INPUT)
-//         UUID of GPU B.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition B.
 //
 // Error codes:
 //     NV_ERR_NO_MEMORY:
@@ -652,10 +666,12 @@ NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
 //
 // Arguments:
 //     gpuUuidA: (INPUT)
-//         UUID of GPU A.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition A.
 //
 //     gpuUuidB: (INPUT)
-//         UUID of GPU B.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition B.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@@ -700,7 +716,9 @@ NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//        UUID of the GPU that the channel is associated with.
+//        UUID of the physical GPU if the GPU is not SMC capable or SMC
+//        enabled, or the GPU instance UUID of the partition that the channel is
+//        associated with.
 //
 //     platformParams: (INPUT)
 //         On Linux: RM ctrl fd, hClient and hChannel.
@@ -1139,11 +1157,14 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
 //         Length, in bytes, of the range.
 //
 //     preferredLocationUuid: (INPUT)
-//         UUID of the preferred location for this VA range.
+//         UUID of the CPU, UUID of the physical GPU if the GPU is not SMC
+//         capable or SMC enabled, or the GPU instance UUID of the partition of
+//         the preferred location for this VA range.
 //
 //     accessedByUuids: (INPUT)
-//         UUIDs of all processors that should have persistent mappings to this
-//         VA range.
+//         UUID of the CPU, UUID of the physical GPUs if the GPUs are not SMC
+//         capable or SMC enabled, or the GPU instance UUID of the partitions
+//         that should have persistent mappings to this VA range.
 //
 //     accessedByCount: (INPUT)
 //         Number of elements in the accessedByUuids array.
@@ -1421,7 +1442,9 @@ NV_STATUS UvmAllocSemaphorePool(void                          *base,
 //         Length, in bytes, of the range.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if the destination processor is
@@ -1499,7 +1522,9 @@ NV_STATUS UvmMigrate(void                  *base,
 //         Length, in bytes, of the range.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if the destination processor is
@@ -1576,7 +1601,9 @@ NV_STATUS UvmMigrateAsync(void                  *base,
 //         Id of the range group whose associated VA ranges have to be migrated.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 // Error codes:
 //     NV_ERR_OBJECT_NOT_FOUND:
@@ -1938,7 +1965,9 @@ NV_STATUS UvmMapExternalAllocation(void                              *base,
 //
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to map the sparse region on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to map the sparse
+//         region on.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@@ -1995,7 +2024,9 @@ NV_STATUS UvmMapExternalSparse(void                  *base,
 //         The length of the virtual address range.
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to unmap the VA range from.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to unmap the VA
+//         range from.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@@ -2062,7 +2093,9 @@ NV_STATUS UvmUnmapExternalAllocation(void                  *base,
 //         supported by the GPU.
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to map the dynamic parallelism region on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to map the
+//         dynamic parallelism region on.
 //
 // Errors:
 //     NV_ERR_UVM_ADDRESS_IN_USE:
@@ -2293,7 +2326,9 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //         Length, in bytes, of the range.
 //
 //     preferredLocationUuid: (INPUT)
-//         UUID of the preferred location.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         preferred location.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if preferredLocationUuid is the
@@ -2469,8 +2504,9 @@ NV_STATUS UvmUnsetPreferredLocation(void     *base,
 //         Length, in bytes, of the range.
 //
 //     accessedByUuid: (INPUT)
-//         UUID of the processor that should have pages in the the VA range
-//         mapped when possible.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         that should have pages in the VA range mapped when possible.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@@ -2538,8 +2574,10 @@ NV_STATUS UvmSetAccessedBy(void                  *base,
 //         Length, in bytes, of the range.
 //
 //     accessedByUuid: (INPUT)
-//         UUID of the processor from which any policies set by
-//         UvmSetAccessedBy should be revoked for the given VA range.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         from which any policies set by UvmSetAccessedBy should be revoked
+//         for the given VA range.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@@ -2597,7 +2635,9 @@ NV_STATUS UvmUnsetAccessedBy(void                  *base,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to enable software-assisted system-wide atomics on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to enable
+//         software-assisted system-wide atomics on.
 //
 // Error codes:
 //     NV_ERR_NO_MEMORY:
@@ -2633,7 +2673,9 @@ NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to disable software-assisted system-wide atomics on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to disable
+//         software-assisted system-wide atomics on.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@@ -2862,7 +2904,9 @@ NV_STATUS UvmDebugCountersEnable(UvmDebugSession   session,
 //         Name of the counter in that scope.
 //
 //     gpu: (INPUT)
-//         Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition of the scoped GPU.
+//         This parameter is ignored in AllGpu scopes.
 //
 //     pCounterHandle: (OUTPUT)
 //         Handle to the counter address.
@@ -2916,7 +2960,7 @@ NV_STATUS UvmDebugGetCounterVal(UvmDebugSession     session,
 // UvmEventQueueCreate
 //
 // This call creates an event queue of the given size.
-// No events are added in the queue till they are enabled by the user.
+// No events are added in the queue until they are enabled by the user.
 // Event queue data is visible to the user even after the target process dies
 // if the session is active and queue is not freed.
 //
@@ -2967,7 +3011,7 @@ NV_STATUS UvmEventQueueCreate(UvmDebugSession        sessionHandle,
 // UvmEventQueueDestroy
 //
 // This call frees all interal resources associated with the queue, including
-// upinning of the memory associated with that queue. Freeing user buffer is
+// unpinning of the memory associated with that queue. Freeing user buffer is
 // responsibility of a caller. Event queue might be also destroyed as a side
 // effect of destroying a session associated with this queue.
 //
@@ -3151,9 +3195,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
 // UvmEventGetGpuUuidTable
 //
 // Each migration event entry contains the gpu index to/from where data is
-// migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
-// Using indices saves on the size of each event entry. This API provides the
-// gpuIndex to gpuUuid relation to the user.
+// migrated. This index maps to a corresponding physical gpu UUID in the
+// gpuUuidTable. Using indices saves on the size of each event entry. This API
+// provides the gpuIndex to gpuUuid relation to the user.
 //
 // This API does not access the queue state maintained in the user
 // library and so the user doesn't need to acquire a lock to protect the
@@ -3161,9 +3205,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
 //
 // Arguments:
 //     gpuUuidTable: (OUTPUT)
-//         The return value is an array of UUIDs. The array index is the
-//         corresponding gpuIndex. There can be at max 32 gpus associated with
-//         UVM, so array size is 32.
+//         The return value is an array of physical GPU UUIDs. The array index
+//         is the corresponding gpuIndex. There can be at max 32 GPUs
+//         associated with UVM, so array size is 32.
 //
 //     validCount: (OUTPUT)
 //         The system doesn't normally contain 32 GPUs. This field gives the
@@ -3222,7 +3266,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
 //------------------------------------------------------------------------------
 NV_STATUS UvmEventFetch(UvmDebugSession      sessionHandle,
                        UvmEventQueueHandle  queueHandle,
-                        UvmEventEntry       *pBuffer,
+                        UvmEventEntry_V1    *pBuffer,
                        NvU64               *nEntries);

 //------------------------------------------------------------------------------
@@ -3418,10 +3462,15 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
 // 4. Destroy event Queue using UvmToolsDestroyEventQueue
 //

-
+#if UVM_API_REV_IS_AT_MOST(10)
+// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
+// sizeof(UvmToolsEventControlData_V2).
 NvLength UvmToolsGetEventControlSize(void);

+// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
+// sizeof(UvmEventEntry_V2).
 NvLength UvmToolsGetEventEntrySize(void);
+#endif

 NvLength UvmToolsGetNumberOfCounters(void);

@@ -3436,6 +3485,12 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //     session: (INPUT)
 //         Handle to the tools session.
 //
+//     version: (INPUT)
+//         Requested version for events or counters.
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
+//         UvmToolsEventControlData_V2::version records the entry version that
+//         will be generated.
+//
 //     event_buffer: (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
 //         hold at least event_buffer_size events. Gets pinned until queue is
@@ -3447,10 +3502,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //
 //     event_control (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
-//         hold UvmToolsEventControlData (although single page-size allocation
-//         should be more than enough). One could call
-//         UvmToolsGetEventControlSize() function to find out current size of
-//         UvmToolsEventControlData. Gets pinned until queue is destroyed.
+//         hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
+//         UvmToolsEventControlData_V2 (although single page-size allocation
+//         should be more than enough). Gets pinned until queue is destroyed.
 //
 //     queue: (OUTPUT)
 //         Handle to the created queue.
@@ -3460,22 +3514,32 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //         Session handle does not refer to a valid session
 //
 //     NV_ERR_INVALID_ARGUMENT:
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
 //         One of the parameters: event_buffer, event_buffer_size, event_control
 //         is not valid
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES:
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate a queue of requested size. Another would be
-//         that either event_buffer or event_control memory couldn't be pinned
-//         (e.g. because of OS limitation of pinnable memory). Also it could not
-//         have been possible to create UvmToolsEventQueueDescriptor.
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate a queue of requested size. Another
+//         would be either event_buffer or event_control memory couldn't be
+//         pinned (e.g. because of OS limitation of pinnable memory). Also it
+//         could not have been possible to create UvmToolsEventQueueDescriptor.
 //
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(10)
 NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle     session,
                                   void                     *event_buffer,
                                   NvLength                  event_buffer_size,
                                   void                     *event_control,
                                   UvmToolsEventQueueHandle *queue);
+#else
+NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle        session,
+                                   UvmToolsEventQueueVersion    version,
+                                   void                        *event_buffer,
+                                   NvLength                     event_buffer_size,
+                                   void                        *event_control,
+                                   UvmToolsEventQueueHandle    *queue);
+#endif

 UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);

@@ -3512,7 +3576,7 @@ NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
 //------------------------------------------------------------------------------
 // UvmToolsDestroyEventQueue
 //
-// Destroys all internal resources associated with the queue. It unpinns the
+// Destroys all internal resources associated with the queue. It unpins the
 // buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
 // destroyed when corresponding session gets destroyed.
 //
@@ -3534,7 +3598,7 @@ NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
 // UvmEventQueueEnableEvents
 //
 // This call enables a particular event type in the event queue. All events are
-// disabled by default. Any event type is considered listed if and only if it's
+// disabled by default. Any event type is considered listed if and only if its
 // corresponding value is equal to 1 (in other words, bit is set). Disabled
 // events listed in eventTypeFlags are going to be enabled. Enabled events and
 // events not listed in eventTypeFlags are not affected by this call.
@@ -3567,7 +3631,7 @@ NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
 // UvmToolsEventQueueDisableEvents
 //
 // This call disables a particular event type in the event queue. Any event type
-// is considered listed if and only if it's corresponding value is equal to 1
+// is considered listed if and only if its corresponding value is equal to 1
 // (in other words, bit is set). Enabled events listed in eventTypeFlags are
 // going to be disabled. Disabled events and events not listed in eventTypeFlags
 // are not affected by this call.
@@ -3605,7 +3669,7 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
 //
 // Counters position follows the layout of the memory that UVM driver decides to
 // use. To obtain particular counter value, user should perform consecutive
-// atomic reads at a a given buffer + offset address.
+// atomic reads at a given buffer + offset address.
 //
 // It is not defined what is the initial value of a counter. User should rely on
 // a difference between each snapshot.
@@ -3628,9 +3692,9 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
 //         Provided session is not valid
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate counters structure. Another would be that
-//         either event_buffer or event_control memory couldn't be pinned
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate counters structure. Another would be
+//         that either event_buffer or event_control memory couldn't be pinned
 //         (e.g. because of OS limitation of pinnable memory)
 //
 //------------------------------------------------------------------------------
@@ -3641,12 +3705,12 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //------------------------------------------------------------------------------
 // UvmToolsCreateProcessorCounters
 //
-// Creates the counters structure for tracking per-process counters.
+// Creates the counters structure for tracking per-processor counters.
 // These counters are disabled by default.
 //
 // Counters position follows the layout of the memory that UVM driver decides to
 // use. To obtain particular counter value, user should perform consecutive
-// atomic reads at a a given buffer + offset address.
+// atomic reads at a given buffer + offset address.
 //
 // It is not defined what is the initial value of a counter. User should rely on
 // a difference between each snapshot.
@@ -3662,7 +3726,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //         counters are destroyed.
 //
 //     processorUuid: (INPUT)
-//        UUID of the resource, for which counters will provide statistic data.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID of
+//         the resource, for which counters will provide statistic data.
 //
 //     counters: (OUTPUT)
 //         Handle to the created counters.
@@ -3672,9 +3738,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //         session handle does not refer to a valid tools session
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate counters structure. Another would be that
-//         either event_buffer or event_control memory couldn't be pinned
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate counters structure. Another would be
+//         that either event_buffer or event_control memory couldn't be pinned
 //         (e.g. because of OS limitation of pinnable memory)
 //
 //     NV_ERR_INVALID_ARGUMENT
@@ -3690,7 +3756,7 @@ NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle   session,
 // UvmToolsDestroyCounters
 //
 // Destroys all internal resources associated with this counters structure.
-// It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
+// It unpins the buffer provided in UvmToolsCreate*Counters. Counters structure
 // also gest destroyed when corresponding session is destroyed.
 //
 // Arguments:
@@ -3711,7 +3777,7 @@ NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
 // UvmToolsEnableCounters
 //
 // This call enables certain counter types in the counters structure. Any
-// counter type is considered listed if and only if it's corresponding value is
+// counter type is considered listed if and only if its corresponding value is
 // equal to 1 (in other words, bit is set). Disabled counter types listed in
 // counterTypeFlags are going to be enabled. Already enabled counter types and
 // counter types not listed in counterTypeFlags are not affected by this call.
@@ -3745,7 +3811,7 @@ NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
 // UvmToolsDisableCounters
 //
 // This call disables certain counter types in the counters structure. Any
-// counter type is considered listed if and only if it's corresponding value is
+// counter type is considered listed if and only if its corresponding value is
 // equal to 1 (in other words, bit is set). Enabled counter types listed in
 // counterTypeFlags are going to be disabled. Already disabled counter types and
 // counter types not listed in counterTypeFlags are not affected by this call.
@@ -3890,32 +3956,72 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle  session,
 // UvmToolsGetProcessorUuidTable
 //
 // Populate a table with the UUIDs of all the currently registered processors
-// in the target process.  When a GPU is registered, it is added to the table.
-// When a GPU is unregistered, it is removed.  As long as a GPU remains registered,
-// its index in the table does not change.  New registrations obtain the first
-// unused index.
+// in the target process. When a GPU is registered, it is added to the table.
+// When a GPU is unregistered, it is removed. As long as a GPU remains
+// registered, its index in the table does not change.
+// Note that the index in the table corresponds to the processor ID reported
+// in UvmEventEntry event records and that the table is not contiguously packed
+// with non-zero UUIDs even with no GPU unregistrations.
 //
 // Arguments:
 //     session: (INPUT)
 //         Handle to the tools session.
 //
+//     version: (INPUT)
+//         Requested version for the UUID table returned. The version must
+//         match the requested version of the event queue created with
+//         UvmToolsCreateEventQueue().
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
+//
 //     table: (OUTPUT)
 //         Array of processor UUIDs, including the CPU's UUID which is always
 //         at index zero.  The srcIndex and dstIndex fields of the
 //         UvmEventMigrationInfo struct index this array.  Unused indices will
-//         have a UUID of zero.
+//         have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
+//         for the UUID of the physical GPU and only supports a single SMC
+//         partition registered per process. Version UvmEventEntry_V2 supports
+//         multiple SMC partitions registered per process and uses physical GPU
+//         UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
+//         UUIDs for SMC partitions.
+//         The table pointer can be NULL in which case, the size of the table
+//         needed to hold all the UUIDs is returned in 'count'.
+//
+//     table_size: (INPUT)
+//         The size of the table in number of array elements. This can be
+//         zero if the table pointer is NULL.
 //
 //     count: (OUTPUT)
-//         Set by UVM to the number of UUIDs written, including any gaps in
-//         the table due to unregistered GPUs.
+//         On output, it is set by UVM to the number of UUIDs needed to hold
+//         all the UUIDs, including any gaps in the table due to unregistered
+//         GPUs.
 //
 // Error codes:
 //     NV_ERR_INVALID_ADDRESS:
-//         writing to table failed.
+//         writing to table failed or the count pointer was invalid.
+//
+//     NV_ERR_INVALID_ARGUMENT:
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
+//         The count pointer is NULL.
+//         See UvmToolsEventQueueVersion.
+//
+//     NV_WARN_MISMATCHED_TARGET:
+//         The kernel returned a table suitable for UvmEventEntry_V1 events.
+//         (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
+//
+//     NV_ERR_NO_MEMORY:
+//         Internal memory allocation failed.
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(10)
 NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle  session,
                                        NvProcessorUuid       *table,
                                        NvLength              *count);
+#else
+NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle      session,
+                                        UvmToolsEventQueueVersion  version,
+                                        NvProcessorUuid           *table,
+                                        NvLength                   table_size,
+                                        NvLength                  *count);
+#endif

 //------------------------------------------------------------------------------
 // UvmToolsFlushEvents
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -34,16 +34,6 @@

    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())

-// ATS prefetcher uses hmm_range_fault() to query residency information.
-// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
-// of memory regions while hmm_range_fault() is being called, MMU interval
-// notifiers are needed.
-    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
-        #define UVM_ATS_PREFETCH_SUPPORTED() 1
-    #else
-        #define UVM_ATS_PREFETCH_SUPPORTED() 0
-    #endif
-
 typedef struct
 {
    // Mask of gpu_va_spaces which are registered for ATS access. The mask is
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -30,7 +30,7 @@
 #include <linux/mempolicy.h>
 #include <linux/mmu_notifier.h>

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
 #include <linux/hmm.h>
 #endif

@@ -246,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
    return uvm_ats_region_from_start_end(start, end);
 }

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()

 static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
 {
@@ -284,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
                                            uvm_ats_fault_context_t *ats_context)
 {
    NV_STATUS status = NV_OK;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
    int ret;
    NvU64 start;
    NvU64 end;
-    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
    struct hmm_range range;
    uvm_page_index_t page_index;
    uvm_va_block_region_t vma_region;
@@ -370,6 +370,8 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,

    mmu_interval_notifier_remove(range.notifier);

+#else
+    uvm_page_mask_zero(residency_mask);
 #endif

    return status;
@@ -403,21 +405,24 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
-    NV_STATUS status = NV_OK;
+    NV_STATUS status;
    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);

+    // Residency mask needs to be computed even if prefetching is disabled since
+    // the residency information is also needed by access counters servicing in
+    // uvm_ats_service_access_counters()
+    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        return status;
+
    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
        return status;

    if (uvm_page_mask_empty(accessed_mask))
        return status;

-    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
-    if (status != NV_OK)
-        return status;
-
    // Prefetch the entire region if none of the pages are resident on any node
    // and if preferred_location is the faulting GPU.
    if (ats_context->prefetch_state.has_preferred_location &&
@@ -637,8 +642,18 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

+    // Ignoring the return value of ats_compute_prefetch is ok since prefetching
+    // is just an optimization and servicing access counter migrations is still
+    // worthwhile even without any prefetching added. So, let servicing continue
+    // instead of returning early even if the prefetch computation fails.
    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);

+    // Remove pages which are already resident at the intended destination from
+    // the accessed_mask.
+    uvm_page_mask_andnot(&ats_context->accessed_mask,
+                         &ats_context->accessed_mask,
+                         &ats_context->prefetch_state.residency_mask);
+
    for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
        NV_STATUS status;
        NvU64 start = base + (subregion.first * PAGE_SIZE);
--- a/kernel-open/nvidia-uvm/uvm_common.c
+++ b/kernel-open/nvidia-uvm/uvm_common.c
@@ -318,10 +318,11 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
    unsigned i;
    unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;

-    memcpy(buffer, "UVM-GPU-", 8);
    if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
        return *buffer = 0;

+    memcpy(buffer, "UVM-GPU-", 8);
+
    for (i = 0; i < 16; i++) {
        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@@ -151,22 +151,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
    return NV_OK;
 }

-static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
-{
-    uvm_gpu_t *gpu;
-
-    // TODO: Bug 4351121: RM will return the GI UUID, but
-    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
-    // Match on GI UUID until the UVM user level API has been updated to use
-    // the GI UUID.
-    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
-            mem_info->uuid = gpu->parent->uuid;
-            break;
-        }
-    }
-}
-
 static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
 {
    NV_STATUS status = NV_OK;
@@ -197,11 +181,6 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
    if (status != NV_OK)
        return status;

-    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
-    // physical GPU UUID until the UVM user level has been updated to use
-    // the GI UUID.
-    fix_memory_info_uuid(va_space, &memory_info);
-
    TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);

    TEST_CHECK_GOTO((memory_info.size == params->size), done);
@@ -309,11 +288,6 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
   if (status != NV_OK)
       return status;

-    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
-    // physical GPU UUID until the UVM user level has been updated to use
-    // the GI UUID.
-    fix_memory_info_uuid(va_space, &memory_info);
-
    memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));

    memset(pte_buffer, 0, sizeof(pte_buffer));
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -314,7 +314,7 @@ static NV_STATUS uvm_suspend(void)
        // interrupts in the bottom half in the future, the bottom half flush
        // below will no longer be able to guarantee that all outstanding
        // notifications have been handled.
-        uvm_gpu_access_counters_set_ignore(gpu, true);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);

        uvm_parent_gpu_set_isr_suspended(gpu->parent, true);

@@ -373,13 +373,13 @@ static NV_STATUS uvm_resume(void)

        // Bring the fault buffer software state back in sync with the
        // hardware state.
-        uvm_gpu_fault_buffer_resume(gpu->parent);
+        uvm_parent_gpu_fault_buffer_resume(gpu->parent);

        uvm_parent_gpu_set_isr_suspended(gpu->parent, false);

        // Reenable access counter interrupt processing unless notifications
        // have been set to be suppressed.
-        uvm_gpu_access_counters_set_ignore(gpu, false);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
    }

    uvm_up_write(&g_uvm_global.pm.lock);
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -59,6 +59,7 @@ MODULE_PARM_DESC(uvm_peer_copy, "Choose the addressing mode for peer copying, op

 static void remove_gpu(uvm_gpu_t *gpu);
 static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
+static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu);
 static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu);
 static void destroy_nvlink_peers(uvm_gpu_t *gpu);

@@ -241,6 +242,8 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
        gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
    }

+    gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
+
    return NV_OK;
 }

@@ -843,11 +846,11 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
    if (!uvm_procfs_is_enabled())
        return NV_OK;

-    format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), uvm_gpu_uuid(gpu));
+    format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->parent->uuid);

    gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();

-    // Create UVM-GPU-${UUID}/${sub_processor_index} directory
+    // Create UVM-GPU-${physical-UUID}/${sub_processor_index} directory
    snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));

    gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
@@ -855,7 +858,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
        return NV_ERR_OPERATING_SYSTEM;

    // Create symlink from ${gpu_id} to
-    // gpus/UVM-GPU-${UUID}/${sub_processor_index}
+    // UVM-GPU-${physical-UUID}/${sub_processor_index}
    snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
    snprintf(gpu_dir_name,
             sizeof(gpu_dir_name),
@@ -867,6 +870,16 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
    if (gpu->procfs.dir_symlink == NULL)
        return NV_ERR_OPERATING_SYSTEM;

+    if (gpu->parent->smc.enabled) {
+        // Create symlink from UVM-GPU-${GI-UUID} to
+        // UVM-GPU-${physical-UUID}/${sub_processor_index}
+        format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->uuid);
+
+        gpu->procfs.gpu_instance_uuid_symlink = proc_symlink(uuid_text_buffer, gpu_base_dir_entry, gpu_dir_name);
+        if (gpu->procfs.gpu_instance_uuid_symlink == NULL)
+            return NV_ERR_OPERATING_SYSTEM;
+    }
+
    // GPU peer files are debug only
    if (!uvm_procfs_is_debug_enabled())
        return NV_OK;
@@ -882,6 +895,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
 static void deinit_procfs_dirs(uvm_gpu_t *gpu)
 {
    proc_remove(gpu->procfs.dir_peers);
+    proc_remove(gpu->procfs.gpu_instance_uuid_symlink);
    proc_remove(gpu->procfs.dir_symlink);
    proc_remove(gpu->procfs.dir);
 }
@@ -1038,6 +1052,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
    NvU32 num_entries;
    NvU64 va_size;
    NvU64 va_per_entry;
+    uvm_mmu_page_table_alloc_t *tree_alloc;

    status = uvm_page_tree_init(gpu,
                                NULL,
@@ -1059,20 +1074,30 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
    // Make sure that RM's part of the VA is aligned to the VA covered by a
    // single top level PDE.
    UVM_ASSERT_MSG(gpu->parent->rm_va_base % va_per_entry == 0,
-                   "va_base 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_base, va_per_entry);
+                   "va_base 0x%llx va_per_entry 0x%llx\n",
+                   gpu->parent->rm_va_base,
+                   va_per_entry);
    UVM_ASSERT_MSG(gpu->parent->rm_va_size % va_per_entry == 0,
-                   "va_size 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_size, va_per_entry);
+                   "va_size 0x%llx va_per_entry 0x%llx\n",
+                   gpu->parent->rm_va_size,
+                   va_per_entry);

+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
+
+    tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
    status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
-            uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
-            uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
-            gpu_get_internal_pasid(gpu)));
+                                                               tree_alloc->addr.address,
+                                                               num_entries,
+                                                               tree_alloc->addr.aperture == UVM_APERTURE_VID,
+                                                               gpu_get_internal_pasid(gpu)));
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu));
        return status;
    }
+
    gpu->rm_address_space_moved_to_page_tree = true;

    return NV_OK;
@@ -1212,6 +1237,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,

 static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
 {
+    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+    size_t len;
    NV_STATUS status;

    if (gpu->parent->smc.enabled) {
@@ -1229,6 +1256,20 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
    uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
    gpu->smc.swizz_id = gpu_info->smcSwizzId;

+    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->parent->uuid);
+    snprintf(gpu->name,
+             sizeof(gpu->name),
+             "ID %u: %s",
+             uvm_id_value(gpu->id),
+             uuid_buffer + 4);
+
+    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->uuid);
+    len = strlen(gpu->name);
+    snprintf(gpu->name + len,
+             sizeof(gpu->name) - len,
+             " UVM-GI-%s",
+             uuid_buffer + 8);
+
    // Initialize the per-GPU procfs dirs as early as possible so that other
    // parts of the driver can add files in them as part of their per-GPU init.
    status = init_procfs_dirs(gpu);
@@ -1338,7 +1379,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
                         uvm_parent_gpu_t *parent_gpu,
                         uvm_gpu_t **gpu_out)
 {
-    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
    NV_STATUS status;
    bool alloc_parent = (parent_gpu == NULL);
    uvm_gpu_t *gpu = NULL;
@@ -1364,13 +1404,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
    if (alloc_parent)
        fill_parent_gpu_info(parent_gpu, gpu_info);

-    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
-    snprintf(gpu->name,
-             sizeof(gpu->name),
-             "ID %u: %s",
-             uvm_id_value(gpu->id),
-             uuid_buffer);
-
    // After this point all error clean up should be handled by remove_gpu()

    if (!gpu_supports_uvm(parent_gpu)) {
@@ -1432,13 +1465,25 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,

    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

-    if (alloc_parent) {
+    if (gpu->parent->smc.enabled) {
+        status = discover_smc_peers(gpu);
+        if (status != NV_OK) {
+            // Nobody can have retained the GPU yet, since we still hold the
+            // global lock.
+            UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
+            atomic64_set(&gpu->retained_count, 0);
+            goto error;
+        }
+    }
+    else if (alloc_parent) {
        status = discover_nvlink_peers(gpu);
        if (status != NV_OK) {
-            UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
+            UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n",
+                          nvstatusToString(status),
+                          uvm_gpu_name(gpu));

-            // Nobody can have retained the GPU yet, since we still hold the global
-            // lock.
+            // Nobody can have retained the GPU yet, since we still hold the
+            // global lock.
            UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
            atomic64_set(&gpu->retained_count, 0);
            goto error;
@@ -1686,7 +1731,7 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)

    nv_kthread_q_stop(&parent_gpu->lazy_free_q);

-    for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
+    for_each_sub_processor_index(sub_processor_index)
        UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);

    uvm_kvfree(parent_gpu);
@@ -1915,32 +1960,25 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
    return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
 }

-static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
+uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
 {
    uvm_gpu_id_t gpu_id;

+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
    for_each_gpu_id(gpu_id) {
        uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);

        if (gpu) {
-            if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
-                UVM_ASSERT(!gpu->parent->smc.enabled);
+            if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
                return gpu;
-            }
        }
    }

    return NULL;
 }

-uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
-{
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    return gpu_get_by_uuid_locked(gpu_uuid);
-}
-
-uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
+static uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
 {
    uvm_gpu_t *gpu;

@@ -1998,7 +2036,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,

    if (parent_gpu != NULL) {
        // If the UUID has been seen before, and if SMC is enabled, then check
-        // if this specific partition has been seen previously.  The UUID-based
+        // if this specific partition has been seen previously. The UUID-based
        // look-up above may have succeeded for a different partition with the
        // same parent GPU.
        if (gpu_info->smcEnabled) {
@@ -2287,7 +2325,7 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
        return NV_ERR_OPERATING_SYSTEM;

    // Create a symlink from UVM GPU UUID (UVM-GPU-...) to the UVM GPU ID gpuB
-    format_uuid_to_buffer(symlink_name, sizeof(symlink_name), uvm_gpu_uuid(remote));
+    format_uuid_to_buffer(symlink_name, sizeof(symlink_name), &remote->uuid);
    peer_caps->procfs.peer_symlink_file[local_idx] = proc_symlink(symlink_name,
                                                                  local->procfs.dir_peers,
                                                                  gpu_dir_name);
@@ -2297,6 +2335,24 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
    return NV_OK;
 }

+static NV_STATUS init_procfs_peer_files(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
+{
+    NV_STATUS status;
+
+    if (!uvm_procfs_is_debug_enabled())
+        return NV_OK;
+
+    status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
+    if (status != NV_OK)
+        return status;
+
+    status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
+    if (status != NV_OK)
+        return status;
+
+    return NV_OK;
+}
+
 static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
                                  uvm_gpu_t *gpu1,
                                  const UvmGpuP2PCapsParams *p2p_caps_params,
@@ -2377,16 +2433,41 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
    }

-    if (!uvm_procfs_is_debug_enabled())
-        return NV_OK;
+    return init_procfs_peer_files(gpu0, gpu1);
+}

-    status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
-    if (status != NV_OK)
-        return status;
+static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu)
+{
+    NvU32 sub_processor_index;
+    uvm_gpu_t *other_gpu;
+    NV_STATUS status;

-    status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
-    if (status != NV_OK)
-        return status;
+    UVM_ASSERT(gpu);
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    UVM_ASSERT(gpu->parent->smc.enabled);
+
+    for_each_sub_processor_index(sub_processor_index) {
+        uvm_gpu_peer_t *peer_caps;
+
+        other_gpu = gpu->parent->gpus[sub_processor_index];
+        if (!other_gpu || other_gpu == gpu)
+            continue;
+
+        peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
+        if (peer_caps->ref_count == 1)
+            continue;
+
+        UVM_ASSERT(peer_caps->ref_count == 0);
+
+        memset(peer_caps, 0, sizeof(*peer_caps));
+        peer_caps->ref_count = 1;
+
+        status = init_procfs_peer_files(gpu, other_gpu);
+        if (status != NV_OK) {
+            peer_caps->ref_count = 0;
+            return status;
+        }
+    }

    return NV_OK;
 }
@@ -2489,9 +2570,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)

    UVM_ASSERT(gpu);
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    if (gpu->parent->smc.enabled)
-        return NV_OK;
+    UVM_ASSERT(!gpu->parent->smc.enabled);

    for_each_gpu(other_gpu) {
        UvmGpuP2PCapsParams p2p_caps_params;
@@ -2592,10 +2671,6 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
    UVM_ASSERT(gpu0);
    UVM_ASSERT(gpu1);

-    // P2P is not supported under SMC partitioning
-    UVM_ASSERT(!gpu0->parent->smc.enabled);
-    UVM_ASSERT(!gpu1->parent->smc.enabled);
-
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

    peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
@@ -2638,9 +2713,9 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
    // IDs queried from the peer table above which are about to be removed from
    // the global table.
    if (gpu0->parent->access_counters_supported)
-        uvm_gpu_access_counter_buffer_flush(gpu0);
+        uvm_parent_gpu_access_counter_buffer_flush(gpu0->parent);
    if (gpu1->parent->access_counters_supported)
-        uvm_gpu_access_counter_buffer_flush(gpu1);
+        uvm_parent_gpu_access_counter_buffer_flush(gpu1->parent);

    memset(peer_caps, 0, sizeof(*peer_caps));
 }
@@ -2668,12 +2743,17 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu)
 {
    size_t peer_index;
-    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);

    // Indirect peers are accessed as sysmem addresses
    if (peer_caps->is_indirect_peer)
        return UVM_APERTURE_SYS;

+    // MIG instances in the same physical GPU have vidmem addresses
+    if (local_gpu->parent == remote_gpu->parent)
+        return UVM_APERTURE_VID;
+
+    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
+
    if (uvm_id_value(local_gpu->id) < uvm_id_value(remote_gpu->id))
        peer_index = 0;
    else
@@ -3285,12 +3365,19 @@ NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *fil
        .user_client   = params->hClient,
        .user_object   = params->hSmcPartRef,
    };
+    NvProcessorUuid gpu_instance_uuid;
+    NV_STATUS status;

-    return uvm_va_space_register_gpu(va_space,
-                                     &params->gpu_uuid,
-                                     &user_rm_va_space,
-                                     &params->numaEnabled,
-                                     &params->numaNodeId);
+    status = uvm_va_space_register_gpu(va_space,
+                                       &params->gpu_uuid,
+                                       &user_rm_va_space,
+                                       &params->numaEnabled,
+                                       &params->numaNodeId,
+                                       &gpu_instance_uuid);
+    if (status == NV_OK)
+        uvm_uuid_copy(&params->gpu_uuid, &gpu_instance_uuid);
+
+    return status;
 }

 NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp)
@@ -3363,10 +3450,10 @@ NV_STATUS uvm_test_set_prefetch_filtering(UVM_TEST_SET_PREFETCH_FILTERING_PARAMS

    switch (params->filtering_mode) {
        case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_ALL:
-            uvm_gpu_disable_prefetch_faults(gpu->parent);
+            uvm_parent_gpu_disable_prefetch_faults(gpu->parent);
            break;
        case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_NONE:
-            uvm_gpu_enable_prefetch_faults(gpu->parent);
+            uvm_parent_gpu_enable_prefetch_faults(gpu->parent);
            break;
        default:
            status = NV_ERR_INVALID_ARGUMENT;
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -618,9 +618,10 @@ struct uvm_gpu_struct
    // The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
    NvProcessorUuid uuid;

-    // Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
+    // Nice printable name in the format:
+    // ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
    // UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
-    char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+    char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];

    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
@@ -656,6 +657,10 @@ struct uvm_gpu_struct
        // can allocate through PMM (PMA).
        NvU64 max_allocatable_address;

+        // Max supported vidmem page size may be smaller than the max GMMU page
+        // size, because of the vMMU supported page sizes.
+        NvU64 max_vidmem_page_size;
+
        struct
        {
            // True if the platform supports HW coherence and the GPU's memory
@@ -844,6 +849,9 @@ struct uvm_gpu_struct

        struct proc_dir_entry *dir_symlink;

+        // The GPU instance UUID symlink if SMC is enabled.
+        struct proc_dir_entry *gpu_instance_uuid_symlink;
+
        struct proc_dir_entry *info_file;

        struct proc_dir_entry *dir_peers;
@@ -1210,11 +1218,6 @@ static const char *uvm_gpu_name(uvm_gpu_t *gpu)
    return gpu->name;
 }

-static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
-{
-    return &gpu->parent->uuid;
-}
-
 static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
 {
    if (gpu->parent->smc.enabled)
@@ -1234,6 +1237,9 @@ struct uvm_gpu_peer_struct
    // - The global lock is held.
    //
    // - While the global lock was held in the past, the two GPUs were detected
+    //   to be SMC peers and were both retained.
+    //
+    // - While the global lock was held in the past, the two GPUs were detected
    //   to be NVLINK peers and were both retained.
    //
    // - While the global lock was held in the past, the two GPUs were detected
@@ -1319,17 +1325,17 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
 // Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
 // a circular dep between global and gpu modules.

-// Get a uvm_gpu_t by UUID.  This returns NULL if the GPU is not present.  This
-// is the general purpose call that should be used normally.
-// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
-// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
-// instead.
+// Get a uvm_gpu_t by UUID (physical GPU UUID if SMC is not enabled, otherwise
+// GPU instance UUID).
+// This returns NULL if the GPU is not present.
+// This is the general purpose call that should be used normally.
 //
 // LOCKING: requires the global lock to be held
 uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

-// Get a uvm_parent_gpu_t by UUID.  Like uvm_gpu_get_by_uuid(), this function
-// returns NULL if the GPU has not been registered.
+// Get a uvm_parent_gpu_t by UUID (physical GPU UUID).
+// Like uvm_gpu_get_by_uuid(), this function returns NULL if the GPU has not
+// been registered.
 //
 // LOCKING: requires the global lock to be held
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
@@ -1340,13 +1346,6 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);

-// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
-// the partition hasn't been registered. This call needs to be used instead of
-// uvm_gpu_get_by_uuid() when a specific partition is targeted.
-//
-// LOCKING: requires the global lock to be held
-uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);
-
 // Retain a gpu by uuid
 // Returns the retained uvm_gpu_t in gpu_out on success
 //
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2023 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -33,7 +33,7 @@
 #include "uvm_va_space_mm.h"
 #include "uvm_pmm_sysmem.h"
 #include "uvm_perf_module.h"
-#include "uvm_ats_ibm.h"
+#include "uvm_ats.h"
 #include "uvm_ats_faults.h"

 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN     1
@@ -99,7 +99,8 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
                 "Number of remote accesses on a region required to trigger a notification."
                 "Valid values: [1, 65535]");

-static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode);
+static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
+                                               uvm_gpu_buffer_flush_mode_t flush_mode);

 static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};

@@ -126,7 +127,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va

 // Whether access counter migrations are enabled or not. The policy is as
 // follows:
-// - MIMC migrations are disabled by default on all systems except P9.
+// - MIMC migrations are disabled by default on all non-ATS systems.
 // - MOMC migrations are disabled by default on all systems
 // - Users can override this policy by specifying on/off
 static bool is_migration_enabled(uvm_access_counter_type_t type)
@@ -149,7 +150,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
    if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
        return false;

-    if (UVM_ATS_IBM_SUPPORTED())
+    if (UVM_ATS_SUPPORTED())
        return g_uvm_global.ats.supported;

    return false;
@@ -281,7 +282,7 @@ get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm
                                                         &(access_counters)->current_config.momc;
 }

-bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->access_counters_supported);

@@ -340,7 +341,7 @@ static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *confi
    UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
 }

-NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
@@ -444,12 +445,12 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;

 fail:
-    uvm_gpu_deinit_access_counters(parent_gpu);
+    uvm_parent_gpu_deinit_access_counters(parent_gpu);

    return status;
 }

-void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
    uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
@@ -475,7 +476,7 @@ void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
    batch_context->phys.translations = NULL;
 }

-bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
 {
    if (!parent_gpu->access_counters_supported)
        return false;
@@ -518,7 +519,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
    // taken control of the notify buffer since the GPU was initialized. Then
    // flush old notifications. This will update the cached_put pointer.
    access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
-    access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
+    access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);

    access_counters->current_config.threshold = config->threshold;

@@ -537,20 +538,20 @@ error:

 // If ownership is yielded as part of reconfiguration, the access counters
 // handling refcount may not be 0
-static void access_counters_yield_ownership(uvm_gpu_t *gpu)
+static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
-    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;

-    UVM_ASSERT(gpu->parent->access_counters_supported);
-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));

    // Wait for any pending clear operation befor releasing ownership
    status = uvm_tracker_wait(&access_counters->clear_tracker);
    if (status != NV_OK)
        UVM_ASSERT(status == uvm_global_get_status());

-    status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
+    status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
                                                                &access_counters->rm_info));
    UVM_ASSERT(status == NV_OK);
 }
@@ -579,14 +580,14 @@ static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConf

 // Decrement the refcount of access counter enablement. If this is the last
 // reference, disable the HW feature.
-static void gpu_access_counters_disable(uvm_gpu_t *gpu)
+static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
-    UVM_ASSERT(gpu->parent->access_counters_supported);
-    UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);

-    if (--gpu->parent->isr.access_counters.handling_ref_count == 0)
-        access_counters_yield_ownership(gpu);
+    if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
+        access_counters_yield_ownership(parent_gpu);
 }

 // Invoked during registration of the GPU in the VA space
@@ -598,7 +599,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac

    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);

-    if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
+    if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
        status = NV_ERR_INVALID_DEVICE;
    }
    else {
@@ -616,7 +617,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
        // modified to protect from concurrent enablement of access counters in
        // another GPU
        if (status == NV_OK)
-            uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
+            uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
    }

    // If this is the first reference taken on access counters, dropping the
@@ -626,22 +627,24 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
    return status;
 }

-void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
+void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
+                                            uvm_va_space_t *va_space)
 {
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(parent_gpu->access_counters_supported);

-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

-    if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
-        gpu_access_counters_disable(gpu);
+    if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
+                                                        parent_gpu->id)) {
+        parent_gpu_access_counters_disable(parent_gpu);

        // If this is VA space reconfigured access counters, clear the
        // ownership to allow for other processes to invoke the reconfiguration
-        if (gpu->parent->access_counter_buffer_info.reconfiguration_owner == va_space)
-            gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
+        if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
+            parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
    }

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
@@ -660,15 +663,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
    UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
 }

-static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode)
+static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
+                                               uvm_gpu_buffer_flush_mode_t flush_mode)
 {
    NvU32 get;
    NvU32 put;
    uvm_spin_loop_t spin;
-    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;

-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);

    // Read PUT pointer from the GPU if requested
    UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
@@ -680,28 +684,28 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl

    while (get != put) {
        // Wait until valid bit is set
-        UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin);
+        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);

-        gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get);
+        parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
        ++get;
        if (get == access_counters->max_notifications)
            get = 0;
    }

-    write_get(gpu->parent, get);
+    write_get(parent_gpu, get);
 }

-void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
+void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(parent_gpu->access_counters_supported);

    // Disables access counter interrupts and notification servicing
-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

-    if (gpu->parent->isr.access_counters.handling_ref_count > 0)
-        access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
+    if (parent_gpu->isr.access_counters.handling_ref_count > 0)
+        access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
@@ -1027,7 +1031,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        if (!iter.migratable)
            continue;

-        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, address, processor);
+        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
        if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
            // If the page is throttling, ignore the access counter
            // notification
@@ -1212,7 +1216,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,

        service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
        service_context->num_retries = 0;
-        service_context->block_context->mm = mm;
+
+        uvm_va_block_context_init(service_context->block_context, mm);

        if (uvm_va_block_is_hmm(va_block))
            uvm_hmm_migrate_begin_wait(va_block);
@@ -1221,7 +1226,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,

        reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);

-        status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
+        status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
+                                           &va_block_retry,
                                           service_va_block_locked(processor,
                                                                   va_block,
                                                                   &va_block_retry,
@@ -1506,8 +1512,6 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
    service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
    service_context->num_retries = 0;

-    uvm_va_block_context_init(service_context->block_context, mm);
-
    return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
                                     &va_block_retry,
                                     service_va_block_locked(processor,
@@ -1519,6 +1523,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,

 static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
                                      uvm_va_block_t *va_block,
+                                      uvm_va_block_context_t *va_block_context,
                                      uvm_page_mask_t *accessed_pages,
                                      const uvm_access_counter_buffer_entry_t *current_entry)
 {
@@ -1546,7 +1551,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,

    page_index = uvm_va_block_cpu_page_index(va_block, addr);

-    resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
+    resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);

    // resident_id might be invalid or might already be the same as the GPU
    // which received the notification if the memory was already migrated before
@@ -1602,6 +1607,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
    uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
+    uvm_service_block_context_t *service_context = &batch_context->block_service_context;

    UVM_ASSERT(va_block);
    UVM_ASSERT(index < batch_context->virt.num_notifications);
@@ -1610,16 +1616,24 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_

    uvm_page_mask_zero(accessed_pages);

+    uvm_va_block_context_init(service_context->block_context, mm);
+
    uvm_mutex_lock(&va_block->lock);

    for (i = index; i < batch_context->virt.num_notifications; i++) {
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
        NvU64 address = current_entry->address.address;

-        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
-            expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
-        else
+        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
+            expand_notification_block(gpu_va_space,
+                                      va_block,
+                                      batch_context->block_service_context.block_context,
+                                      accessed_pages,
+                                      current_entry);
+        }
+        else {
            break;
+        }
    }

    *out_index = i;
@@ -1698,6 +1712,9 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
    // Atleast one notification should have been processed.
    UVM_ASSERT(index < *out_index);

+    // TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
+    //                    location is set
+    // If no pages were actually migrated, don't clear the access counters.
    status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
    if (status != NV_OK)
        flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
@@ -1985,7 +2002,7 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
    if (!gpu)
        return NV_ERR_INVALID_DEVICE;

-    params->enabled = uvm_gpu_access_counters_required(gpu->parent);
+    params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);

    uvm_gpu_release(gpu);

@@ -2050,11 +2067,11 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
        goto exit_isr_unlock;
    }

-    if (!uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
+    if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
        status = gpu_access_counters_enable(gpu, &config);

        if (status == NV_OK)
-            uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
+            uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
        else
            goto exit_isr_unlock;
    }
@@ -2066,7 +2083,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
    // enabled in at least gpu. This inconsistent state is not visible to other
    // threads or VA spaces because of the ISR lock, and it is immediately
    // rectified by retaking ownership.
-    access_counters_yield_ownership(gpu);
+    access_counters_yield_ownership(gpu->parent);
    status = access_counters_take_ownership(gpu, &config);

    // Retaking ownership failed, so RM owns the interrupt.
@@ -2080,8 +2097,8 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
                           "Access counters interrupt still owned by RM, other VA spaces may experience failures");
        }

-        uvm_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id);
-        gpu_access_counters_disable(gpu);
+        uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
+        parent_gpu_access_counters_disable(gpu->parent);
        goto exit_isr_unlock;
    }

@@ -2167,42 +2184,42 @@ exit_release_gpu:
    return status;
 }

-void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
+void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
 {
    bool change_intr_state = false;

-    if (!gpu->parent->access_counters_supported)
+    if (!parent_gpu->access_counters_supported)
        return;

-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

    if (do_ignore) {
-        if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
+        if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
            change_intr_state = true;
    }
    else {
-        UVM_ASSERT(gpu->parent->access_counter_buffer_info.notifications_ignored_count >= 1);
-        if (--gpu->parent->access_counter_buffer_info.notifications_ignored_count == 0)
+        UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
+        if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
            change_intr_state = true;
    }

    if (change_intr_state) {
        // We need to avoid an interrupt storm while ignoring notifications. We
        // just disable the interrupt.
-        uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
+        uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

        if (do_ignore)
-            uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
+            uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
        else
-            uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
+            uvm_parent_gpu_access_counters_intr_enable(parent_gpu);

-        uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
+        uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

        if (!do_ignore)
-            access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
+            access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
    }

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
@@ -2216,7 +2233,7 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
        return NV_ERR_INVALID_DEVICE;

    if (gpu->parent->access_counters_supported)
-        uvm_gpu_access_counters_set_ignore(gpu, params->ignore);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
    else
        status = NV_ERR_NOT_SUPPORTED;

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,13 +27,13 @@
 #include "uvm_forward_decl.h"
 #include "uvm_test_ioctl.h"

-NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
-bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);

 void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);

-void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
+void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

 // Ignore or unignore access counters notifications. Ignoring means that the
 // bottom half is a no-op which just leaves notifications in the HW buffer
@@ -46,7 +46,7 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
 //
 // When uningoring, the interrupt conditions will be re-evaluated to trigger
 // processing of buffered notifications, if any exist.
-void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
+void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);

 // Return whether the VA space has access counter migrations enabled. The
 // caller must ensure that the VA space cannot go away.
@@ -63,7 +63,7 @@ void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);

 // Check whether access counters should be enabled when the given GPU is
 // registered on any VA space.
-bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);

 // Functions used to enable/disable access counters on a GPU in the given VA
 // space.
@@ -72,12 +72,12 @@ bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
 // counters are currently enabled. The hardware notifications and interrupts on
 // the GPU are enabled the first time any VA space invokes
 // uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
-// uvm_gpu_access_counters_disable
+// uvm_parent_gpu_access_counters_disable().
 //
 // Locking: the VA space lock must not be held by the caller since these
 // functions may take the access counters ISR lock.
 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
-void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
+void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);

 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
                                                      struct file *filp);
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -100,7 +100,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
    if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
        return 0;

-    if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
+    if (!uvm_parent_gpu_replayable_faults_pending(parent_gpu)) {
        up(&parent_gpu->isr.replayable_faults.service_lock.sem);
        return 0;
    }
@@ -137,7 +137,7 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
    // interrupts will be triggered by the gpu and faults may stay
    // unserviced. Therefore, if there is a fault in the queue, we schedule
    // a bottom half unconditionally.
-    if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
+    if (!uvm_parent_gpu_non_replayable_faults_pending(parent_gpu))
        return 0;

    nv_kref_get(&parent_gpu->gpu_kref);
@@ -167,7 +167,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
        return 0;

-    if (!uvm_gpu_access_counters_pending(parent_gpu)) {
+    if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
        up(&parent_gpu->isr.access_counters.service_lock.sem);
        return 0;
    }
@@ -295,7 +295,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
    uvm_va_block_context_t *block_context;

    if (parent_gpu->replayable_faults_supported) {
-        status = uvm_gpu_fault_buffer_init(parent_gpu);
+        status = uvm_parent_gpu_fault_buffer_init(parent_gpu);
        if (status != NV_OK) {
            UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
                          nvstatusToString(status),
@@ -361,7 +361,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        }

        if (parent_gpu->access_counters_supported) {
-            status = uvm_gpu_init_access_counters(parent_gpu);
+            status = uvm_parent_gpu_init_access_counters(parent_gpu);
            if (status != NV_OK) {
                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
                              nvstatusToString(status),
@@ -423,7 +423,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
    // bottom half never take the global lock, since we're holding it here.
    //
    // Note that it's safe to call nv_kthread_q_stop() even if
-    // nv_kthread_q_init() failed in uvm_gpu_init_isr().
+    // nv_kthread_q_init() failed in uvm_parent_gpu_init_isr().
    nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
    nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
 }
@@ -438,8 +438,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
        // replayable_faults.disable_intr_ref_count since they must retain the
        // GPU across uvm_parent_gpu_replayable_faults_isr_lock/
        // uvm_parent_gpu_replayable_faults_isr_unlock. This means the
-        // uvm_gpu_replayable_faults_disable_intr above could only have raced
-        // with bottom halves.
+        // uvm_parent_gpu_replayable_faults_disable_intr above could only have
+        // raced with bottom halves.
        //
        // If we cleared replayable_faults.handling before the bottom half got
        // to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
@@ -455,13 +455,13 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
                       uvm_parent_gpu_name(parent_gpu),
                       parent_gpu->isr.replayable_faults.disable_intr_ref_count);

-        uvm_gpu_fault_buffer_deinit(parent_gpu);
+        uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
    }

    if (parent_gpu->access_counters_supported) {
        // It is safe to deinitialize access counters even if they have not been
        // successfully initialized.
-        uvm_gpu_deinit_access_counters(parent_gpu);
+        uvm_parent_gpu_deinit_access_counters(parent_gpu);
        block_context =
            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
        uvm_va_block_context_free(block_context);
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2023 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -116,8 +116,8 @@


 // There is no error handling in this function. The caller is in charge of
-// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
-NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
+NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;

@@ -145,7 +145,7 @@ NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *pare
    return NV_OK;
 }

-void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;

@@ -163,7 +163,7 @@ void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_
    non_replayable_faults->fault_cache        = NULL;
 }

-bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
    NvBool has_pending_faults;
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -26,12 +26,12 @@
 #include <nvstatus.h>
 #include "uvm_forward_decl.h"

-bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);

 void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);

-NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);

-void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,6 +44,24 @@
 // provides some background for understanding replayable faults, non-replayable
 // faults, and how UVM services each fault type.

+// The HW fault buffer flush mode instructs RM on how to flush the hardware
+// replayable fault buffer; it is only used in Confidential Computing.
+//
+// Unless HW_FAULT_BUFFER_FLUSH_MODE_MOVE is functionally required (because UVM
+// needs to inspect the faults currently present in the HW fault buffer) it is
+// recommended to use HW_FAULT_BUFFER_FLUSH_MODE_DISCARD for performance
+// reasons.
+typedef enum
+{
+    // Flush the HW fault buffer, discarding all the resulting faults. UVM never
+    // gets to see these faults.
+    HW_FAULT_BUFFER_FLUSH_MODE_DISCARD,
+
+    // Flush the HW fault buffer, and move all the resulting faults to the SW
+    // fault ("shadow") buffer.
+    HW_FAULT_BUFFER_FLUSH_MODE_MOVE,
+} hw_fault_buffer_flush_mode_t;
+
 #define UVM_PERF_REENABLE_PREFETCH_FAULTS_LAPSE_MSEC_DEFAULT 1000

 // Lapse of time in milliseconds after which prefetch faults can be re-enabled.
@@ -226,7 +244,7 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    batch_context->utlbs               = NULL;
 }

-NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
+NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;

@@ -253,7 +271,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
        goto fail;

    if (parent_gpu->non_replayable_faults_supported) {
-        status = uvm_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
+        status = uvm_parent_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
        if (status != NV_OK)
            goto fail;
    }
@@ -261,28 +279,28 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;

 fail:
-    uvm_gpu_fault_buffer_deinit(parent_gpu);
+    uvm_parent_gpu_fault_buffer_deinit(parent_gpu);

    return status;
 }

 // Reinitialize state relevant to replayable fault handling after returning
 // from a power management cycle.
-void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    fault_buffer_reinit_replayable_faults(parent_gpu);
 }

-void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

    if (parent_gpu->non_replayable_faults_supported)
-        uvm_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
+        uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);

    fault_buffer_deinit_replayable_faults(parent_gpu);

@@ -297,7 +315,7 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
    }
 }

-bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;

@@ -533,25 +551,26 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
    parent_gpu->fault_buffer_hal->write_get(parent_gpu, get);
 }

-static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
+// In Confidential Computing GSP-RM owns the HW replayable fault buffer.
+// Flushing the fault buffer implies flushing both the HW buffer (using a RM
+// API), and the SW buffer accessible by UVM ("shadow" buffer).
+//
+// The HW buffer needs to be flushed first. This is because, once that flush
+// completes, any faults that were present in the HW buffer have been moved to
+// the shadow buffer, or have been discarded by RM.
+static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_fault_buffer_flush_mode_t flush_mode)
 {
-    NV_STATUS status = NV_OK;
+    NV_STATUS status;
+    NvBool is_flush_mode_move;
+
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+    UVM_ASSERT((flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE) || (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_DISCARD));

-    // When Confidential Computing is enabled, GSP-RM owns the HW replayable
-    // fault buffer. Flushing the fault buffer implies flushing both the HW
-    // buffer (using a RM API), and the SW buffer accessible by UVM ("shadow"
-    // buffer).
-    //
-    // The HW buffer needs to be flushed first. This is because, once that
-    // flush completes, any faults that were present in the HW buffer when
-    // fault_buffer_flush_locked is called, are now either flushed from the HW
-    // buffer, or are present in the shadow buffer and are about to be discarded
-    // too.
    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

-    // Flush the HW replayable buffer owned by GSP-RM.
-    status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
+    is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
+    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);

    UVM_ASSERT(status == NV_OK);

@@ -595,10 +614,9 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,

    // Read PUT pointer from the GPU if requested
    if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT || flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT) {
-        status = hw_fault_buffer_flush_locked(parent_gpu);
+        status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
        if (status != NV_OK)
            return status;
-
        replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
    }

@@ -1435,7 +1453,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
                                                uvm_fault_access_type_to_prot(service_access_type)))
            continue;

-        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, current_entry->fault_address, gpu->id);
+        thrashing_hint = uvm_perf_thrashing_get_hint(va_block,
+                                                     block_context->block_context,
+                                                     current_entry->fault_address,
+                                                     gpu->id);
        if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
            // Throttling is implemented by sleeping in the fault handler on
            // the CPU and by continuing to process faults on other pages on
@@ -1981,7 +2002,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
    // in the HW buffer. When GSP owns the HW buffer, we also have to wait for
    // GSP to copy all available faults from the HW buffer into the shadow
    // buffer.
-    status = hw_fault_buffer_flush_locked(gpu->parent);
+    status = hw_fault_buffer_flush_locked(gpu->parent, HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
    if (status != NV_OK)
        goto done;

@@ -2738,14 +2759,14 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu, uvm_fau
         (uvm_enable_builtin_tests &&
          parent_gpu->rm_info.isSimulated &&
          batch_context->num_invalid_prefetch_faults > 5))) {
-        uvm_gpu_disable_prefetch_faults(parent_gpu);
+        uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
    }
    else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;

        // Reenable prefetch faults after some time
        if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
-            uvm_gpu_enable_prefetch_faults(parent_gpu);
+            uvm_parent_gpu_enable_prefetch_faults(parent_gpu);
    }
 }

@@ -2872,7 +2893,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
        UVM_DBG_PRINT("Error servicing replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
 }

-void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@@ -2883,7 +2904,7 @@ void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    }
 }

-void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@@ -2940,7 +2961,7 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA

    do {
        uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
-        pending = uvm_gpu_replayable_faults_pending(gpu->parent);
+        pending = uvm_parent_gpu_replayable_faults_pending(gpu->parent);
        uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);

        if (!pending)
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -52,12 +52,12 @@ typedef enum

 const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);

-NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);

-void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);

-bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);

 // Clear valid bit for all remaining unserviced faults in the buffer, set GET to
 // PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
@@ -68,8 +68,8 @@ bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
 NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);

 // Enable/disable HW support for prefetch-initiated faults
-void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

 // Service pending replayable faults on the given GPU. This function must be
 // only called from the ISR bottom half
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1306,7 +1306,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
    uvm_va_policy_node_t *node;
    uvm_va_block_region_t region;
-    uvm_processor_mask_t map_processors;
+    uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction;
    uvm_processor_id_t id;
    NV_STATUS tracker_status;
    NV_STATUS status = NV_OK;
@@ -1333,9 +1333,9 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,

            // Exclude the processors that have been already mapped due to
            // AccessedBy.
-            uvm_processor_mask_andnot(&map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
+            uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);

-            for_each_gpu_id_in_mask(id, &map_processors) {
+            for_each_gpu_id_in_mask(id, map_processors) {
                uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
                uvm_va_block_gpu_state_t *gpu_state;

@@ -1866,7 +1866,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
                                unsigned long *dst_pfns,
                                uvm_page_mask_t *same_devmem_page_mask)
 {
-    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
+    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index);
    uvm_va_block_region_t chunk_region;
    struct page *dst_page;

@@ -2708,7 +2708,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
                // Since there is a CPU resident page, there shouldn't be one
                // anywhere else. TODO: Bug 3660922: Need to handle read
                // duplication at some point.
-                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
+                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
+                                                                        service_context->block_context,
+                                                                        page_index));

                // migrate_vma_setup() was able to isolate and lock the page;
                // therefore, it is CPU resident and not mapped.
@@ -2725,8 +2727,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
            // used for GPU to GPU copies. It can't be an evicted page because
            // migrate_vma_setup() would have found a source page.
            if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
-                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
-
+                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
+                                                                        service_context->block_context,
+                                                                        page_index));
                hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
            }
        }
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2019 NVidia Corporation
+    Copyright (c) 2013-2023 NVidia Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -320,7 +320,7 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS];                    // IN
+    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1];                 // IN
    NvU32           numGpus;                                       // IN
    NvU64           serverId                    NV_ALIGN_BYTES(8); // OUT
    NV_STATUS       rmStatus;                                      // OUT
@@ -344,9 +344,9 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // OUT
-    NvU32           validCount;                 // OUT
-    NV_STATUS       rmStatus;                   // OUT
+    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT
+    NvU32           validCount;                    // OUT
+    NV_STATUS       rmStatus;                      // OUT
 } UVM_GET_GPU_UUID_TABLE_PARAMS;

 #if defined(WIN32) || defined(WIN64)
@@ -494,7 +494,7 @@ typedef struct
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
    NvU64                   offset                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NvS32                   rmCtrlFd;                                          // IN
    NvU32                   hClient;                                           // IN
@@ -552,7 +552,7 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpu_uuid;    // IN
+    NvProcessorUuid gpu_uuid;    // IN/OUT
    NvBool          numaEnabled; // OUT
    NvS32           numaNodeId;  // OUT
    NvS32           rmCtrlFd;    // IN
@@ -835,7 +835,14 @@ typedef struct

 //
 // Initialize any tracker object such as a queue or counter
-// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, UvmToolsCreateProcessorCounters
+// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
+// UvmToolsCreateProcessorCounters.
+// Note that the order of structure elements has the version as the last field.
+// This is used to tell whether the kernel supports V2 events or not because
+// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
+// field but V2 will. This is needed because it is possible to create an event
+// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
+// been called yet and the kernel version is unknown.
 //
 #define UVM_TOOLS_INIT_EVENT_TRACKER                                  UVM_IOCTL_BASE(56)
 typedef struct
@@ -847,6 +854,8 @@ typedef struct
    NvU32           allProcessors;                        // IN
    NvU32           uvmFd;                                // IN
    NV_STATUS       rmStatus;                             // OUT
+    NvU32           requestedVersion;                     // IN
+    NvU32           grantedVersion;                       // OUT
 } UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;

 //
@@ -927,6 +936,12 @@ typedef struct

 //
 // UvmToolsGetProcessorUuidTable
+// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
+// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
+// processor IDs (physical GPU UUIDs) will be reported.
+// tablePtr == 0 and count == 0 can be used to query how many processors are
+// present in order to dynamically allocate the correct size array since the
+// total number of processors is returned in 'count'.
 //
 #define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE                            UVM_IOCTL_BASE(64)
 typedef struct
@@ -934,6 +949,7 @@ typedef struct
    NvU64     tablePtr                 NV_ALIGN_BYTES(8); // IN
    NvU32     count;                                      // IN/OUT
    NV_STATUS rmStatus;                                   // OUT
+    NvU32     version;                                    // OUT
 } UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;


@@ -979,7 +995,7 @@ typedef struct
 {
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NV_STATUS               rmStatus;                                          // OUT
 } UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
        #define UVM_IS_CONFIG_HMM() 0
    #endif

+// ATS prefetcher uses hmm_range_fault() to query residency information.
+// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
+// of memory regions while hmm_range_fault() is being called, MMU interval
+// notifiers are needed.
+    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
+    #else
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
+    #endif
+
 // Various issues prevent us from using mmu_notifiers in older kernels. These
 // include:
 //  - ->release being called under RCU instead of SRCU: fixed by commit
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@@ -633,8 +633,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
                                          uvm_gpu_t *mapping_gpu,
                                          const UvmGpuMemoryInfo *mem_info)
 {
-    uvm_gpu_t *owning_gpu = NULL;
-    uvm_gpu_t *gpu;
+    uvm_gpu_t *owning_gpu;

    if (mem_info->egm)
        UVM_ASSERT(mem_info->sysmem);
@@ -653,16 +652,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
    // registered.
    // This also checks for if EGM owning GPU is registered.

-    // TODO: Bug 4351121: RM will return the GI UUID, but
-    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
-    // Match on GI UUID until the UVM user level API has been updated to use
-    // the GI UUID.
-    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
-            owning_gpu = gpu;
-            break;
-        }
-    }
+    owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
    if (!owning_gpu)
        return NV_ERR_INVALID_DEVICE;

@@ -954,6 +944,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
        goto error;
    }

+    // Check for the maximum page size for the mapping of vidmem allocations,
+    // the vMMU segment size may limit the range of page sizes.
+    if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
+        (mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
+        mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
+
    mem_info.pageSize = mapping_page_size;

    status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
@@ -989,7 +985,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
    if (uvm_api_range_invalid_4k(params->base, params->length))
        return NV_ERR_INVALID_ADDRESS;

-    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
+    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
        return NV_ERR_INVALID_ARGUMENT;

    uvm_va_space_down_read_rm(va_space);
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -86,7 +86,7 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,

    // Only map those pages that are not already mapped on destination
    for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
-        prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
+        prot = uvm_va_block_page_compute_highest_permission(va_block, va_block_context, dest_id, page_index);
        if (prot == UVM_PROT_NONE)
            continue;

--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -149,6 +149,26 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
    return NV_OK;
 }

+// The aperture may filter the biggest page size:
+// - UVM_APERTURE_VID       biggest page size on vidmem mappings
+// - UVM_APERTURE_SYS       biggest page size on sysmem mappings
+// - UVM_APERTURE_PEER_0-7  biggest page size on peer mappings
+static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
+{
+    UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
+
+    // There may be scenarios where the GMMU must use a subset of the supported
+    // page sizes, e.g., to comply with the vMMU supported page sizes due to
+    // segmentation sizes.
+    if (aperture == UVM_APERTURE_VID) {
+        UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
+        return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
+    }
+    else {
+        return 1 << __fls(tree->hal->page_sizes());
+    }
+}
+
 static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
                                          NvLength size,
                                          uvm_pmm_alloc_flags_t pmm_flags,
@@ -856,7 +876,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
    if (!page_tree_ats_init_required(tree))
        return NV_OK;

-    page_size = uvm_mmu_biggest_page_size(tree);
+    page_size = mmu_biggest_page_size(tree, UVM_APERTURE_VID);

    uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);

@@ -1090,6 +1110,8 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
    tree->gpu_va_space = gpu_va_space;
    tree->big_page_size = big_page_size;

+    UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
+
    page_tree_set_location(tree, location);

    uvm_tracker_init(&tree->tracker);
@@ -2301,7 +2323,7 @@ NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)

    UVM_ASSERT(!uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));

-    page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
+    page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_VID);
    size = UVM_ALIGN_UP(gpu->mem_info.max_allocatable_address + 1, page_size);

    UVM_ASSERT(page_size);
@@ -2338,9 +2360,9 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
    if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
        return NV_OK;

-    page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
-    size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
    aperture = uvm_gpu_peer_aperture(gpu, peer);
+    page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
+    size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
    peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
    phys_offset = 0ULL;

@@ -2783,7 +2805,7 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
    // sysmem mappings with 128K entries.
    UVM_ASSERT(is_power_of_2(mapping_size));
    UVM_ASSERT(mapping_size >= UVM_SIZE_1GB);
-    UVM_ASSERT(mapping_size >= uvm_mmu_biggest_page_size(&gpu->address_space_tree));
+    UVM_ASSERT(mapping_size >= mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS));
    UVM_ASSERT(mapping_size <= flat_sysmem_va_size);

    flat_sysmem_va_size = UVM_ALIGN_UP(flat_sysmem_va_size, mapping_size);
@@ -2828,7 +2850,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
        if (sysmem_mapping->range_vec == NULL) {
            uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
            NvU64 phys_offset = curr_pa;
-            NvU32 page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
+            NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
            uvm_pmm_alloc_flags_t pmm_flags;

            // No eviction is requested when allocating the page tree storage,
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -612,6 +612,9 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
    return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
 }

+// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
+// the largest page size supported in a given system, which considers the GMMU
+// and vMMU page sizes and segment sizes.
 static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
 {
    UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
@@ -642,11 +645,6 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
    return page_size;
 }

-static NvU32 uvm_mmu_biggest_page_size(uvm_page_tree_t *tree)
-{
-    return 1 << __fls(tree->hal->page_sizes());
-}
-
 static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
 {
    return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@@ -1442,6 +1442,7 @@ static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_locatio

 static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
                                                                  uvm_va_block_t *va_block,
+                                                                  uvm_va_block_context_t *va_block_context,
                                                                  uvm_page_index_t page_index,
                                                                  page_thrashing_info_t *page_thrashing,
                                                                  uvm_processor_id_t requester)
@@ -1460,7 +1461,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras

    hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;

-    closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
+    closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
    if (uvm_va_block_is_hmm(va_block)) {
        // HMM pages always start out resident on the CPU but may not be
        // recorded in the va_block state because hmm_range_fault() or
@@ -1601,6 +1602,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
 //   that case we keep the page pinned while applying the same algorithm as in
 //   Phase1.
 uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
+                                                      uvm_va_block_context_t *va_block_context,
                                                      NvU64 address,
                                                      uvm_processor_id_t requester)
 {
@@ -1713,6 +1715,7 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
    else {
        hint = get_hint_for_migration_thrashing(va_space_thrashing,
                                                va_block,
+                                                va_block_context,
                                                page_index,
                                                page_thrashing,
                                                requester);
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
@@ -74,7 +74,9 @@ typedef struct
 } uvm_perf_thrashing_hint_t;

 // Obtain a hint to prevent thrashing on the page with given address
-uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block, NvU64 address,
+uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
+                                                      uvm_va_block_context_t *va_block_context,
+                                                      NvU64 address,
                                                      uvm_processor_id_t requester);

 // Obtain a pointer to a mask with the processors that are thrashing on the
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@@ -1408,8 +1408,6 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
    uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(accessing_gpu, gpu);
    uvm_gpu_identity_mapping_t *gpu_peer_mapping;

-    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
-
    if (peer_caps->is_indirect_peer ||
        (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)) {
        // Indirect peers are accessed as sysmem addresses, so they don't need
--- a/kernel-open/nvidia-uvm/uvm_pmm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_test.c
@@ -1082,6 +1082,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
 {
    uvm_va_range_t *va_range;
    uvm_va_block_t *va_block = NULL;
+    uvm_va_block_context_t *va_block_context = NULL;
    NvU32 num_blocks;
    NvU32 index = 0;
    uvm_gpu_phys_address_t phys_addr = {0};
@@ -1099,9 +1100,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
    }
    TEST_CHECK_RET(va_block);

+    va_block_context = uvm_va_block_context_alloc(NULL);
+    TEST_CHECK_RET(va_block_context);
+
    uvm_mutex_lock(&va_block->lock);

-    is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, 0, gpu->id), gpu->id);
+    is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
    if (is_resident) {
        phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
        phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
@@ -1109,6 +1113,8 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t

    uvm_mutex_unlock(&va_block->lock);

+    uvm_va_block_context_free(va_block_context);
+
    TEST_CHECK_RET(is_resident);

    // Perform the lookup for the whole root chunk
--- a/kernel-open/nvidia-uvm/uvm_processors.c
+++ b/kernel-open/nvidia-uvm/uvm_processors.c
@@ -25,6 +25,8 @@
 #include "uvm_processors.h"

 static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
+const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
+const uvm_processor_mask_t g_uvm_processor_mask_empty = { };

 NV_STATUS uvm_processor_mask_cache_init(void)
 {
--- a/kernel-open/nvidia-uvm/uvm_processors.h
+++ b/kernel-open/nvidia-uvm/uvm_processors.h
@@ -522,6 +522,9 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t,              \
                   uvm_processor_id_t,                \
                   uvm_id_from_value)

+extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
+extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
+
 // Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
 // Returns whether the GPUs in subset are a subset of the GPUs in mask.
 bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
@@ -567,6 +570,10 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
         (uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
         i = uvm_gpu_id_next(i))

+// Helper to iterate over all sub processor indexes.
+#define for_each_sub_processor_index(i) \
+    for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
+
 // Helper to iterate over all valid processor ids.
 #define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))

--- a/kernel-open/nvidia-uvm/uvm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_test.c
@@ -41,15 +41,11 @@
 static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
 {
    NvU64 retained_count = 0;
-    uvm_parent_gpu_t *parent_gpu;
    uvm_gpu_t *gpu = NULL;

    uvm_mutex_lock(&g_uvm_global.global_lock);

-    parent_gpu = uvm_parent_gpu_get_by_uuid(&params->gpu_uuid);
-    if (parent_gpu)
-        gpu = uvm_gpu_get_by_parent_and_swizz_id(parent_gpu, params->swizz_id);
-
+    gpu = uvm_gpu_get_by_uuid(&params->gpu_uuid);
    if (gpu != NULL)
        retained_count = uvm_gpu_retained_count(gpu);

--- a/kernel-open/nvidia-uvm/uvm_test_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_test_ioctl.h
@@ -40,7 +40,6 @@ typedef struct
 {
    // In params
    NvProcessorUuid gpu_uuid;
-    NvU32           swizz_id;
    // Out params
    NvU64           ref_count NV_ALIGN_BYTES(8);
    NV_STATUS       rmStatus;
@@ -192,7 +191,7 @@ typedef struct
    NvU32                           read_duplication;                                   // Out (UVM_TEST_READ_DUPLICATION_POLICY)
    NvProcessorUuid                 preferred_location;                                 // Out
    NvS32                           preferred_cpu_nid;                                  // Out
-    NvProcessorUuid                 accessed_by[UVM_MAX_PROCESSORS];                    // Out
+    NvProcessorUuid                 accessed_by[UVM_MAX_PROCESSORS_V2];                 // Out
    NvU32                           accessed_by_count;                                  // Out
    NvU32                           type;                                               // Out (UVM_TEST_VA_RANGE_TYPE)
    union
@@ -505,7 +504,12 @@ typedef struct
 typedef struct
 {
    // In params
-    UvmEventEntry entry; // contains only NvUxx types
+    union
+    {
+        UvmEventEntry_V1 entry_v1; // contains only NvUxx types
+        UvmEventEntry_V2 entry_v2; // contains only NvUxx types
+    };
+    NvU32 version;
    NvU32 count;

    // Out param
@@ -620,7 +624,7 @@ typedef struct

    // Array of processors which have a resident copy of the page containing
    // lookup_address.
-    NvProcessorUuid                 resident_on[UVM_MAX_PROCESSORS];                    // Out
+    NvProcessorUuid                 resident_on[UVM_MAX_PROCESSORS_V2];                 // Out
    NvU32                           resident_on_count;                                  // Out

    // If the memory is resident on the CPU, the NUMA node on which the page
@@ -631,24 +635,24 @@ typedef struct
    // system-page-sized portion of this allocation which contains
    // lookup_address is guaranteed to be resident on the corresponding
    // processor.
-    NvU32                           resident_physical_size[UVM_MAX_PROCESSORS];         // Out
+    NvU32                           resident_physical_size[UVM_MAX_PROCESSORS_V2];      // Out

    // The physical address of the physical allocation backing lookup_address.
-    NvU64                           resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
+    NvU64                           resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out

    // Array of processors which have a virtual mapping covering lookup_address.
-    NvProcessorUuid                 mapped_on[UVM_MAX_PROCESSORS];                      // Out
-    NvU32                           mapping_type[UVM_MAX_PROCESSORS];                   // Out
-    NvU64                           mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
+    NvProcessorUuid                 mapped_on[UVM_MAX_PROCESSORS_V2];                   // Out
+    NvU32                           mapping_type[UVM_MAX_PROCESSORS_V2];                // Out
+    NvU64                           mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
    NvU32                           mapped_on_count;                                    // Out

    // The size of the virtual mapping covering lookup_address on each
    // mapped_on processor.
-    NvU32                           page_size[UVM_MAX_PROCESSORS];                      // Out
+    NvU32                           page_size[UVM_MAX_PROCESSORS_V2];                   // Out

    // Array of processors which have physical memory populated that would back
    // lookup_address if it was resident.
-    NvProcessorUuid                 populated_on[UVM_MAX_PROCESSORS];                   // Out
+    NvProcessorUuid                 populated_on[UVM_MAX_PROCESSORS_V2];                // Out
    NvU32                           populated_on_count;                                 // Out

    NV_STATUS rmStatus;                                                                 // Out
--- a/kernel-open/nvidia-uvm/uvm_tools.c
+++ b/kernel-open/nvidia-uvm/uvm_tools.c
--- a/kernel-open/nvidia-uvm/uvm_types.h
+++ b/kernel-open/nvidia-uvm/uvm_types.h
@@ -52,8 +52,19 @@ typedef enum

 typedef unsigned long long UvmStream;

-#define UVM_MAX_GPUS         NV_MAX_DEVICES
-#define UVM_MAX_PROCESSORS   (UVM_MAX_GPUS + 1)
+// The maximum number of GPUs changed when multiple MIG instances per
+// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
+#define UVM_MAX_GPUS_V1       NV_MAX_DEVICES
+#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
+#define UVM_MAX_GPUS_V2       (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
+#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
+
+// For backward compatibility:
+// TODO: Bug 4465348: remove these after replacing old references.
+#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
+#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
+
+#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))

 #define UVM_INIT_FLAGS_DISABLE_HMM                       ((NvU64)0x1)
 #define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE        ((NvU64)0x2)
@@ -152,6 +163,8 @@ typedef enum {

 typedef struct
 {
+    // UUID of the physical GPU if the GPU is not SMC capable or SMC enabled,
+    // or the GPU instance UUID of the partition.
    NvProcessorUuid gpuUuid;
    NvU32           gpuMappingType;     // UvmGpuMappingType
    NvU32           gpuCachingType;     // UvmGpuCachingType
@@ -410,7 +423,29 @@ typedef struct
    NvU32 pid;                // process id causing the fault
    NvU32 threadId;           // thread id causing the fault
    NvU64 pc;                 // address of the instruction causing the fault
-} UvmEventCpuFaultInfo;
+} UvmEventCpuFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be 1st argument of this structure. Setting eventType to
+    // UvmEventTypeMemoryViolation helps to identify event data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 accessType;          // read/write violation (UvmEventMemoryAccessType)
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets.
+    //
+    NvU16 padding16Bits;
+    NvS32 nid;                // NUMA node ID of faulting CPU
+    NvU64 address;            // faulting address
+    NvU64 timeStamp;          // cpu time when the fault occurred
+    NvU32 pid;                // process id causing the fault
+    NvU32 threadId;           // thread id causing the fault
+    NvU64 pc;                 // address of the instruction causing the fault
+} UvmEventCpuFaultInfo_V2;

 typedef enum
 {
@@ -567,7 +602,49 @@ typedef struct
                                   // on the gpu
    NvU64 endTimeStampGpu;         // time stamp when the migration finished
                                   // on the gpu
-} UvmEventMigrationInfo;
+} UvmEventMigrationInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure. Setting eventType
+    // to UvmEventTypeMigration helps to identify event data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // Cause that triggered the migration
+    //
+    NvU8 migrationCause;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU16 padding16Bits;
+    //
+    // Indices are used for the source and destination of migration instead of
+    // using gpu uuid/cpu id. This reduces the size of each event. The index to
+    // gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable.
+    // Currently we do not distinguish between CPUs so they all use index 0.
+    //
+    NvU16 srcIndex;                // source CPU/GPU index
+    NvU16 dstIndex;                // destination CPU/GPU index
+    NvS32 srcNid;                  // source CPU NUMA node ID
+    NvS32 dstNid;                  // destination CPU NUMA node ID
+    NvU64 address;                 // base virtual addr used for migration
+    NvU64 migratedBytes;           // number of bytes migrated
+    NvU64 beginTimeStamp;          // cpu time stamp when the memory transfer
+                                   // was queued on the gpu
+    NvU64 endTimeStamp;            // cpu time stamp when the memory transfer
+                                   // finalization was communicated to the cpu
+                                   // For asynchronous operations this field
+                                   // will be zero
+    NvU64 rangeGroupId;            // range group tied with this migration
+    NvU64 beginTimeStampGpu;       // time stamp when the migration started
+                                   // on the gpu
+    NvU64 endTimeStampGpu;         // time stamp when the migration finished
+                                   // on the gpu
+} UvmEventMigrationInfo_V2;

 typedef enum
 {
@@ -633,7 +710,64 @@ typedef struct
    //
    NvU8 padding8Bits;
    NvU16 padding16Bits;
-} UvmEventGpuFaultInfo;
+} UvmEventGpuFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeGpuFault helps to identify event data in
+    // a queue.
+    //
+    NvU8 eventType;
+    NvU8 faultType;       // type of gpu fault, refer UvmEventFaultType
+    NvU8 accessType;      // memory access type, refer UvmEventMemoryAccessType
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8Bits_1;
+    union
+    {
+        NvU16 gpcId;      // If this is a replayable fault, this field contains
+                          // the physical GPC index where the fault was
+                          // triggered
+
+        NvU16 channelId;  // If this is a non-replayable fault, this field
+                          // contains the id of the channel that launched the
+                          // operation that caused the fault.
+                          //
+                          // TODO: Bug 3283289: this field is ambiguous for
+                          // Ampere+ GPUs, but it is never consumed by clients.
+    };
+    NvU16 clientId;       // Id of the MMU client that triggered the fault. This
+                          // is the value provided by HW and is architecture-
+                          // specific. There are separate client ids for
+                          // different client types (See dev_fault.h).
+    NvU64 address;        // virtual address at which gpu faulted
+    NvU64 timeStamp;      // time stamp when the cpu started processing the
+                          // fault
+    NvU64 timeStampGpu;   // gpu time stamp when the fault entry was written
+                          // in the fault buffer
+    NvU32 batchId;        // Per-GPU unique id to identify the faults serviced
+                          // in batch before:
+                          // - Issuing a replay for replayable faults
+                          // - Re-scheduling the channel for non-replayable
+                          //   faults.
+    NvU8 clientType;      // Volta+ GPUs can fault on clients other than GR.
+                          // UvmEventFaultClientTypeGpc indicates replayable
+                          // fault, while UvmEventFaultClientTypeHub indicates
+                          // non-replayable fault.
+
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8Bits_2;
+    NvU16 gpuIndex;       // GPU that experienced the fault
+} UvmEventGpuFaultInfo_V2;

 //------------------------------------------------------------------------------
 // This info is provided when a gpu fault is replayed (for replayable faults)
@@ -666,7 +800,25 @@ typedef struct
                            // accesses is queued on the gpu
    NvU64 timeStampGpu;     // gpu time stamp when the replay operation finished
                            // executing on the gpu
-} UvmEventGpuFaultReplayInfo;
+} UvmEventGpuFaultReplayInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 clientType;        // See clientType in UvmEventGpuFaultInfo
+    NvU16 gpuIndex;         // GPU that experienced the fault
+    NvU32 batchId;          // Per-GPU unique id to identify the faults that
+                            // have been serviced in batch
+    NvU64 timeStamp;        // cpu time when the replay of the faulting memory
+                            // accesses is queued on the gpu
+    NvU64 timeStampGpu;     // gpu time stamp when the replay operation finished
+                            // executing on the gpu
+} UvmEventGpuFaultReplayInfo_V2;

 //------------------------------------------------------------------------------
 // This info is provided per fatal fault
@@ -689,7 +841,26 @@ typedef struct
    NvU16 padding16bits;
    NvU64 address;        // virtual address at which the processor faulted
    NvU64 timeStamp;      // CPU time when the fault is detected to be fatal
-} UvmEventFatalFaultInfo;
+} UvmEventFatalFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeFatalFault helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    NvU8 faultType;       // type of gpu fault, refer UvmEventFaultType. Only
+                          // valid if processorIndex is a GPU
+    NvU8 accessType;      // memory access type, refer UvmEventMemoryAccessType
+    NvU8 reason;          // reason why the fault is fatal, refer
+                          // UvmEventFatalReason
+    NvU16 processorIndex; // processor that experienced the fault
+    NvU16 padding16bits;
+    NvU64 address;        // virtual address at which the processor faulted
+    NvU64 timeStamp;      // CPU time when the fault is detected to be fatal
+} UvmEventFatalFaultInfo_V2;

 typedef struct
 {
@@ -718,7 +889,38 @@ typedef struct
                            // participate in read-duplicate this is time stamp
                            // when all the operations have been pushed to all
                            // the processors.
-} UvmEventReadDuplicateInfo;
+} UvmEventReadDuplicateInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeReadDuplicate helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits;
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // read-duplicated
+    NvU64 size;             // size in bytes of the memory region that is
+                            // read-duplicated
+    NvU64 timeStamp;        // cpu time stamp when the memory region becomes
+                            // read-duplicate. Since many processors can
+                            // participate in read-duplicate this is time stamp
+                            // when all the operations have been pushed to all
+                            // the processors.
+    NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
+                            // mask that specifies in which processors this
+                            // memory region is read-duplicated. This is last
+                            // so UVM_PROCESSOR_MASK_SIZE can grow.
+} UvmEventReadDuplicateInfo_V2;

 typedef struct
 {
@@ -728,13 +930,13 @@ typedef struct
    // identify event data in a queue.
    //
    NvU8 eventType;
+    NvU8 residentIndex;     // index of the cpu/gpu that now contains the only
+                            // valid copy of the memory region
    //
    // This structure is shared between UVM kernel and tools.
    // Manually padding the structure so that compiler options like pragma pack
    // or malign-double will have no effect on the field offsets
    //
-    NvU8 residentIndex;     // index of the cpu/gpu that now contains the only
-                            // valid copy of the memory region
    NvU16 padding16bits;
    NvU32 padding32bits;
    NvU64 address;          // virtual address of the memory region that is
@@ -746,8 +948,34 @@ typedef struct
                            // participate in read-duplicate this is time stamp
                            // when all the operations have been pushed to all
                            // the processors.
-} UvmEventReadDuplicateInvalidateInfo;
+} UvmEventReadDuplicateInvalidateInfo_V1;

+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to
+    // identify event data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 padding8bits;
+    NvU16 residentIndex;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // read-duplicated
+    NvU64 size;             // size of the memory region that is
+                            // read-duplicated
+    NvU64 timeStamp;        // cpu time stamp when the memory region is no
+                            // longer read-duplicate. Since many processors can
+                            // participate in read-duplicate this is time stamp
+                            // when all the operations have been pushed to all
+                            // the processors.
+} UvmEventReadDuplicateInvalidateInfo_V2;

 typedef struct
 {
@@ -770,7 +998,30 @@ typedef struct
                            // changed
    NvU64 timeStamp;        // cpu time stamp when the new page size is
                            // queued on the gpu
-} UvmEventPageSizeChangeInfo;
+} UvmEventPageSizeChangeInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypePageSizeChange helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8bits;
+    NvU16 processorIndex;   // cpu/gpu processor index for which the page size
+                            // changed
+    NvU32 size;             // new page size
+    NvU64 address;          // virtual address of the page whose size has
+                            // changed
+    NvU64 timeStamp;        // cpu time stamp when the new page size is
+                            // queued on the gpu
+} UvmEventPageSizeChangeInfo_V2;

 typedef struct
 {
@@ -794,7 +1045,33 @@ typedef struct
                            // thrashing
    NvU64 size;             // size of the memory region that is thrashing
    NvU64 timeStamp;        // cpu time stamp when thrashing is detected
-} UvmEventThrashingDetectedInfo;
+} UvmEventThrashingDetectedInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrashingDetected helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8bits;
+    NvU16 padding16bits;
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // thrashing
+    NvU64 size;             // size of the memory region that is thrashing
+    NvU64 timeStamp;        // cpu time stamp when thrashing is detected
+    NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
+                            // mask that specifies which processors are
+                            // fighting for this memory region. This is last
+                            // so UVM_PROCESSOR_MASK_SIZE can grow.
+} UvmEventThrashingDetectedInfo_V2;

 typedef struct
 {
@@ -815,7 +1092,28 @@ typedef struct
    NvU64 address;          // address of the page whose servicing is being
                            // throttled
    NvU64 timeStamp;        // cpu start time stamp for the throttling operation
-} UvmEventThrottlingStartInfo;
+} UvmEventThrottlingStartInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrottlingStart helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits[2];
+    NvU16 processorIndex;   // index of the cpu/gpu that was throttled
+    NvU64 address;          // address of the page whose servicing is being
+                            // throttled
+    NvU64 timeStamp;        // cpu start time stamp for the throttling operation
+} UvmEventThrottlingStartInfo_V2;

 typedef struct
 {
@@ -836,7 +1134,28 @@ typedef struct
    NvU64 address;          // address of the page whose servicing is being
                            // throttled
    NvU64 timeStamp;        // cpu end time stamp for the throttling operation
-} UvmEventThrottlingEndInfo;
+} UvmEventThrottlingEndInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrottlingEnd helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits[2];
+    NvU16 processorIndex;   // index of the cpu/gpu that was throttled
+    NvU64 address;          // address of the page whose servicing is being
+                            // throttled
+    NvU64 timeStamp;        // cpu end time stamp for the throttling operation
+} UvmEventThrottlingEndInfo_V2;

 typedef enum
 {
@@ -892,7 +1211,36 @@ typedef struct
    NvU64 timeStampGpu;     // time stamp when the new mapping is effective in
                            // the processor specified by srcIndex. If srcIndex
                            // is a cpu, this field will be zero.
-} UvmEventMapRemoteInfo;
+} UvmEventMapRemoteInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeMapRemote helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    NvU8 mapRemoteCause;    // field to type UvmEventMapRemoteCause that tells
+                            // the cause for the page to be mapped remotely
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU16 padding16bits;
+    NvU16 srcIndex;         // index of the cpu/gpu being remapped
+    NvU16 dstIndex;         // index of the cpu/gpu memory that contains the
+                            // memory region data
+    NvU64 address;          // virtual address of the memory region that is
+                            // thrashing
+    NvU64 size;             // size of the memory region that is thrashing
+    NvU64 timeStamp;        // cpu time stamp when all the required operations
+                            // have been pushed to the processor
+    NvU64 timeStampGpu;     // time stamp when the new mapping is effective in
+                            // the processor specified by srcIndex. If srcIndex
+                            // is a cpu, this field will be zero.
+} UvmEventMapRemoteInfo_V2;

 typedef struct
 {
@@ -918,7 +1266,33 @@ typedef struct
    NvU64 addressIn;        // virtual address that caused the eviction
    NvU64 size;             // size of the memory region that being evicted
    NvU64 timeStamp;        // cpu time stamp when eviction starts on the cpu
-} UvmEventEvictionInfo;
+} UvmEventEvictionInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeEviction helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits;
+    NvU16 srcIndex;         // index of the cpu/gpu from which data is being
+                            // evicted
+    NvU16 dstIndex;         // index of the cpu/gpu memory to which data is
+                            // going to be stored
+    NvU64 addressOut;       // virtual address of the memory region that is
+                            // being evicted
+    NvU64 addressIn;        // virtual address that caused the eviction
+    NvU64 size;             // size of the memory region that being evicted
+    NvU64 timeStamp;        // cpu time stamp when eviction starts on the cpu
+} UvmEventEvictionInfo_V2;

 // TODO: Bug 1870362: [uvm] Provide virtual address and processor index in
 // AccessCounter events
@@ -978,7 +1352,44 @@ typedef struct
    NvU32 bank;
    NvU64 address;
    NvU64 instancePtr;
-} UvmEventTestAccessCounterInfo;
+} UvmEventTestAccessCounterInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeAccessCounter helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    // See uvm_access_counter_buffer_entry_t for details
+    NvU8 aperture;
+    NvU8 instancePtrAperture;
+    NvU8 isVirtual;
+    NvU8 isFromCpu;
+    NvU8 veId;
+
+    // The physical access counter notification was triggered on a managed
+    // memory region. This is not set for virtual access counter notifications.
+    NvU8 physOnManaged;
+
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 srcIndex;         // index of the gpu that received the access counter
+                            // notification
+    NvU16 padding16bits;
+    NvU32 value;
+    NvU32 subGranularity;
+    NvU32 tag;
+    NvU32 bank;
+    NvU32 padding32bits;
+    NvU64 address;
+    NvU64 instancePtr;
+} UvmEventTestAccessCounterInfo_V2;

 typedef struct
 {
@@ -998,30 +1409,64 @@ typedef struct
            NvU8 eventType;
            UvmEventMigrationInfo_Lite migration_Lite;

-            UvmEventCpuFaultInfo cpuFault;
-            UvmEventMigrationInfo migration;
-            UvmEventGpuFaultInfo gpuFault;
-            UvmEventGpuFaultReplayInfo gpuFaultReplay;
-            UvmEventFatalFaultInfo fatalFault;
-            UvmEventReadDuplicateInfo readDuplicate;
-            UvmEventReadDuplicateInvalidateInfo readDuplicateInvalidate;
-            UvmEventPageSizeChangeInfo pageSizeChange;
-            UvmEventThrashingDetectedInfo thrashing;
-            UvmEventThrottlingStartInfo throttlingStart;
-            UvmEventThrottlingEndInfo throttlingEnd;
-            UvmEventMapRemoteInfo mapRemote;
-            UvmEventEvictionInfo eviction;
+            UvmEventCpuFaultInfo_V1 cpuFault;
+            UvmEventMigrationInfo_V1 migration;
+            UvmEventGpuFaultInfo_V1 gpuFault;
+            UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
+            UvmEventFatalFaultInfo_V1 fatalFault;
+            UvmEventReadDuplicateInfo_V1 readDuplicate;
+            UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate;
+            UvmEventPageSizeChangeInfo_V1 pageSizeChange;
+            UvmEventThrashingDetectedInfo_V1 thrashing;
+            UvmEventThrottlingStartInfo_V1 throttlingStart;
+            UvmEventThrottlingEndInfo_V1 throttlingEnd;
+            UvmEventMapRemoteInfo_V1 mapRemote;
+            UvmEventEvictionInfo_V1 eviction;
        } eventData;

        union
        {
            NvU8 eventType;

-            UvmEventTestAccessCounterInfo accessCounter;
+            UvmEventTestAccessCounterInfo_V1 accessCounter;
            UvmEventTestSplitInvalidateInfo splitInvalidate;
        } testEventData;
    };
-} UvmEventEntry;
+} UvmEventEntry_V1;
+
+typedef struct
+{
+    union
+    {
+        union
+        {
+            NvU8 eventType;
+            UvmEventMigrationInfo_Lite migration_Lite;
+
+            UvmEventCpuFaultInfo_V2 cpuFault;
+            UvmEventMigrationInfo_V2 migration;
+            UvmEventGpuFaultInfo_V2 gpuFault;
+            UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
+            UvmEventFatalFaultInfo_V2 fatalFault;
+            UvmEventReadDuplicateInfo_V2 readDuplicate;
+            UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate;
+            UvmEventPageSizeChangeInfo_V2 pageSizeChange;
+            UvmEventThrashingDetectedInfo_V2 thrashing;
+            UvmEventThrottlingStartInfo_V2 throttlingStart;
+            UvmEventThrottlingEndInfo_V2 throttlingEnd;
+            UvmEventMapRemoteInfo_V2 mapRemote;
+            UvmEventEvictionInfo_V2 eviction;
+        } eventData;
+
+        union
+        {
+            NvU8 eventType;
+
+            UvmEventTestAccessCounterInfo_V2 accessCounter;
+            UvmEventTestSplitInvalidateInfo splitInvalidate;
+        } testEventData;
+    };
+} UvmEventEntry_V2;

 //------------------------------------------------------------------------------
 // Type of time stamp used in the event entry:
@@ -1060,7 +1505,12 @@ typedef enum
    UvmDebugAccessTypeWrite = 1,
 } UvmDebugAccessType;

-typedef struct UvmEventControlData_tag {
+typedef enum {
+    UvmToolsEventQueueVersion_V1 = 1,
+    UvmToolsEventQueueVersion_V2 = 2,
+} UvmToolsEventQueueVersion;
+
+typedef struct UvmEventControlData_V1_tag {
    // entries between get_ahead and get_behind are currently being read
    volatile NvU32 get_ahead;
    volatile NvU32 get_behind;
@@ -1070,7 +1520,30 @@ typedef struct UvmEventControlData_tag {

    // counter of dropped events
    NvU64 dropped[UvmEventNumTypesAll];
-} UvmToolsEventControlData;
+} UvmToolsEventControlData_V1;
+
+typedef struct UvmEventControlData_V2_tag {
+    // entries between get_ahead and get_behind are currently being read
+    volatile NvU32 get_ahead;
+    volatile NvU32 get_behind;
+
+    // entries between put_ahead and put_behind are currently being written
+    volatile NvU32 put_ahead;
+    volatile NvU32 put_behind;
+
+    // The version values are limited to UvmToolsEventQueueVersion and
+    // initialized by UvmToolsCreateEventQueue().
+    NvU32 version;
+    NvU32 padding32Bits;
+
+    // counter of dropped events
+    NvU64 dropped[UvmEventNumTypesAll];
+} UvmToolsEventControlData_V2;
+
+// For backward compatibility:
+// TODO: Bug 4465348: remove these after replacing old references.
+typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
+typedef UvmEventEntry_V1 UvmEventEntry;

 //------------------------------------------------------------------------------
 // UVM Tools forward types (handles) definitions
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@@ -706,11 +706,6 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
 // mm is used to initialize the value of va_block_context->mm. NULL is allowed.
 void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);

-// Return the preferred NUMA node ID for the block's policy.
-// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
-// is returned.
-int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
-
 // TODO: Bug 1766480: Using only page masks instead of a combination of regions
 //       and page masks could simplify the below APIs and their implementations
 //       at the cost of having to scan the whole mask for small regions.
@@ -1546,7 +1541,11 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
 // The [src, src + size) range has to fit within a single PAGE_SIZE page.
 //
 // LOCKING: The caller must hold the va_block lock
-NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size);
+NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block,
+                                   uvm_va_block_context_t *va_block_context,
+                                   uvm_mem_t *dst,
+                                   NvU64 src,
+                                   size_t size);

 // Initialize va block retry tracking
 void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
@@ -2090,11 +2089,14 @@ void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,

 // Count how many processors have a copy of the given page resident in their
 // memory.
-NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
+NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block,
+                                                  uvm_va_block_context_t *va_block_context,
+                                                  uvm_page_index_t page_index);

 // Get the processor with a resident copy of a page closest to the given
 // processor.
 uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
+                                                          uvm_va_block_context_t *va_block_context,
                                                          uvm_page_index_t page_index,
                                                          uvm_processor_id_t processor);

@@ -2127,6 +2129,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
                                                  int nid,
                                                  uvm_page_index_t page_index);

+// Return the CPU chunk for the given page_index from the first available NUMA
+// node from the va_block. Should only be called for HMM va_blocks.
+// Locking: The va_block lock must be held.
+uvm_cpu_chunk_t *uvm_cpu_chunk_get_any_chunk_for_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
+
 // Return the struct page * from the chunk corresponding to the given page_index
 // Locking: The va_block lock must be held.
 struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
@@ -2241,6 +2248,7 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
 // Return the maximum mapping protection for processor_id that will not require
 // any permision revocation on the rest of processors.
 uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
+                                                        uvm_va_block_context_t *va_block_context,
                                                        uvm_processor_id_t processor_id,
                                                        uvm_page_index_t page_index);

--- a/kernel-open/nvidia-uvm/uvm_va_block_types.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block_types.h
@@ -175,6 +175,14 @@ typedef struct
    // Scratch node mask. This follows the same rules as scratch_page_mask;
    nodemask_t scratch_node_mask;

+    // Available as scratch space for the internal APIs. This is like a caller-
+    // save register: it shouldn't be used across function calls which also take
+    // this va_block_context.
+    uvm_processor_mask_t scratch_processor_mask;
+
+    // Temporary mask in block_add_eviction_mappings().
+    uvm_processor_mask_t map_processors_eviction;
+
    // State used by uvm_va_block_make_resident
    struct uvm_make_resident_context_struct
    {
@@ -233,6 +241,16 @@ typedef struct
        // are removed as the operation progresses.
        uvm_page_mask_t revoke_running_page_mask;

+        // Mask used by block_gpu_split_2m and block_gpu_split_big to track
+        // splitting of big PTEs but they are never called concurrently. This
+        // mask can be used concurrently with other page masks.
+        uvm_page_mask_t big_split_page_mask;
+
+        // Mask used by block_unmap_gpu to track non_uvm_lite_gpus which have
+        // this block mapped. This mask can be used concurrently with other page
+        // masks.
+        uvm_processor_mask_t non_uvm_lite_gpus;
+
        uvm_page_mask_t page_mask;
        uvm_page_mask_t filtered_page_mask;
        uvm_page_mask_t migratable_mask;
@@ -276,6 +294,10 @@ typedef struct
        struct vm_area_struct *vma;

 #if UVM_IS_CONFIG_HMM()
+
+        // Temporary mask used in uvm_hmm_block_add_eviction_mappings().
+        uvm_processor_mask_t map_processors_eviction;
+
        // Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
        struct migrate_vma migrate_vma_args;
 #endif
--- a/kernel-open/nvidia-uvm/uvm_va_range.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range.c
@@ -1799,7 +1799,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,

    if (uvm_api_range_invalid(params->base, params->length))
        return NV_ERR_INVALID_ADDRESS;
-    if (params->gpuAttributesCount > UVM_MAX_GPUS)
+    if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
        return NV_ERR_INVALID_ARGUMENT;

    if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -86,11 +86,13 @@ static void init_tools_data(uvm_va_space_t *va_space)

    for (i = 0; i < ARRAY_SIZE(va_space->tools.counters); i++)
        INIT_LIST_HEAD(va_space->tools.counters + i);
-    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues); i++)
-        INIT_LIST_HEAD(va_space->tools.queues + i);
+    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v1); i++)
+        INIT_LIST_HEAD(va_space->tools.queues_v1 + i);
+    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v2); i++)
+        INIT_LIST_HEAD(va_space->tools.queues_v2 + i);
 }

-static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
+static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
 {
    uvm_gpu_t *other_gpu;

@@ -104,7 +106,7 @@ static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *

        peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);

-        if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1) {
+        if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1 || gpu->parent == other_gpu->parent) {
            NV_STATUS status = enable_peers(va_space, gpu, other_gpu);
            if (status != NV_OK)
                return status;
@@ -324,10 +326,16 @@ static void unregister_gpu(uvm_va_space_t *va_space,
        }
    }

-    if (gpu->parent->isr.replayable_faults.handling)
+    if (gpu->parent->isr.replayable_faults.handling) {
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
        uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
-
-    uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
+        uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
+    }
+    else {
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
+        uvm_processor_mask_clear(&va_space->non_faultable_processors, gpu->id);
+    }

    processor_mask_array_clear(va_space->can_access, gpu->id, gpu->id);
    processor_mask_array_clear(va_space->can_access, gpu->id, UVM_ID_CPU);
@@ -514,7 +522,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
            nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);

        if (gpu->parent->access_counters_supported)
-            uvm_gpu_access_counters_disable(gpu, va_space);
+            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
    }

    // Check that all CPU/GPU affinity masks are empty
@@ -604,7 +612,7 @@ uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProces
    uvm_gpu_t *gpu;

    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid))
+        if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
            return gpu;
    }

@@ -663,7 +671,8 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
                                    const NvProcessorUuid *gpu_uuid,
                                    const uvm_rm_user_object_t *user_rm_device,
                                    NvBool *numa_enabled,
-                                    NvS32 *numa_node_id)
+                                    NvS32 *numa_node_id,
+                                    NvProcessorUuid *uuid_out)
 {
    NV_STATUS status;
    uvm_va_range_t *va_range;
@@ -675,13 +684,15 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    if (status != NV_OK)
        return status;

+    uvm_uuid_copy(uuid_out, &gpu->uuid);
+
    // Enabling access counters requires taking the ISR lock, so it is done
    // without holding the (deeper order) VA space lock. Enabling the counters
    // after dropping the VA space lock would create a window of time in which
    // another thread could see the GPU as registered, but access counters would
    // be disabled. Therefore, the counters are enabled before taking the VA
    // space lock.
-    if (uvm_gpu_access_counters_required(gpu->parent)) {
+    if (uvm_parent_gpu_access_counters_required(gpu->parent)) {
        status = uvm_gpu_access_counters_enable(gpu, va_space);
        if (status != NV_OK) {
            uvm_gpu_release(gpu);
@@ -726,10 +737,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;

    if (gpu->parent->isr.replayable_faults.handling) {
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
        uvm_processor_mask_set(&va_space->faultable_processors, gpu->id);
+
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
        // System-wide atomics are enabled by default
        uvm_processor_mask_set(&va_space->system_wide_atomics_enabled_processors, gpu->id);
    }
+    else {
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
+        uvm_processor_mask_set(&va_space->non_faultable_processors, gpu->id);
+    }

    // All GPUs have native atomics on their own memory
    processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
@@ -785,7 +803,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        }
    }

-    status = register_gpu_nvlink_peers(va_space, gpu);
+    status = register_gpu_peers(va_space, gpu);
    if (status != NV_OK)
        goto cleanup;

@@ -822,9 +840,9 @@ done:
    if (status != NV_OK) {
        // There is no risk of disabling access counters on a previously
        // registered GPU: the enablement step would have failed before even
-        // discovering that the GPU is already registed.
-        if (uvm_gpu_access_counters_required(gpu->parent))
-            uvm_gpu_access_counters_disable(gpu, va_space);
+        // discovering that the GPU is already registered.
+        if (uvm_parent_gpu_access_counters_required(gpu->parent))
+            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);

        uvm_gpu_release(gpu);
    }
@@ -876,15 +894,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
    // it from the VA space until we're done.
    uvm_va_space_up_read_rm(va_space);

-    // If uvm_gpu_access_counters_required(gpu->parent) is true, a concurrent
-    // registration could enable access counters after they are disabled here.
+    // If uvm_parent_gpu_access_counters_required(gpu->parent) is true, a
+    // concurrent registration could enable access counters after they are
+    // disabled here.
    // The concurrent registration will fail later on if it acquires the VA
    // space lock before the unregistration does (because the GPU is still
    // registered) and undo the access counters enablement, or succeed if it
    // acquires the VA space lock after the unregistration does. Both outcomes
    // result on valid states.
    if (gpu->parent->access_counters_supported)
-        uvm_gpu_access_counters_disable(gpu, va_space);
+        uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);

    // mmap_lock is needed to establish CPU mappings to any pages evicted from
    // the GPU if accessed by CPU is set for them.
@@ -1040,6 +1059,10 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
            processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
        }
    }
+    else if (gpu0->parent == gpu1->parent) {
+        processor_mask_array_set(va_space->has_native_atomics, gpu0->id, gpu1->id);
+        processor_mask_array_set(va_space->has_native_atomics, gpu1->id, gpu0->id);
+    }

    UVM_ASSERT(va_space_check_processors_masks(va_space));
    __set_bit(table_index, va_space->enabled_peers);
@@ -1091,6 +1114,7 @@ static NV_STATUS retain_pcie_peers_from_uuids(uvm_va_space_t *va_space,
 static bool uvm_va_space_pcie_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 {
    return !processor_mask_array_test(va_space->has_nvlink, gpu0->id, gpu1->id) &&
+           gpu0->parent != gpu1->parent &&
           uvm_va_space_peer_enabled(va_space, gpu0, gpu1);
 }

--- a/kernel-open/nvidia-uvm/uvm_va_space.h
+++ b/kernel-open/nvidia-uvm/uvm_va_space.h
@@ -163,6 +163,10 @@ struct uvm_va_space_struct
    // faults.
    uvm_processor_mask_t faultable_processors;

+    // Mask of processors registered with the va space that don't support
+    // faulting.
+    uvm_processor_mask_t non_faultable_processors;
+
    // This is a count of non fault capable processors with a GPU VA space
    // registered.
    NvU32 num_non_faultable_gpu_va_spaces;
@@ -261,8 +265,8 @@ struct uvm_va_space_struct
    // Mask of processors that are participating in system-wide atomics
    uvm_processor_mask_t system_wide_atomics_enabled_processors;

-    // Mask of GPUs where access counters are enabled on this VA space
-    uvm_processor_mask_t access_counters_enabled_processors;
+    // Mask of physical GPUs where access counters are enabled on this VA space
+    uvm_parent_processor_mask_t access_counters_enabled_processors;

    // Array with information regarding CPU/GPU NUMA affinity. There is one
    // entry per CPU NUMA node. Entries in the array are populated sequentially
@@ -308,7 +312,8 @@ struct uvm_va_space_struct

        // Lists of counters listening for events on this VA space
        struct list_head counters[UVM_TOTAL_COUNTERS];
-        struct list_head queues[UvmEventNumTypesAll];
+        struct list_head queues_v1[UvmEventNumTypesAll];
+        struct list_head queues_v2[UvmEventNumTypesAll];

        // Node for this va_space in global subscribers list
        struct list_head node;
@@ -399,7 +404,7 @@ static void uvm_va_space_processor_uuid(uvm_va_space_t *va_space, NvProcessorUui
    else {
        uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
        UVM_ASSERT(gpu);
-        memcpy(uuid, uvm_gpu_uuid(gpu), sizeof(*uuid));
+        memcpy(uuid, &gpu->uuid, sizeof(*uuid));
    }
 }

@@ -472,9 +477,9 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space);
        uvm_mutex_unlock(&(__va_space)->serialize_writers_lock);        \
    } while (0)

-// Get a registered gpu by uuid. This restricts the search for GPUs, to those that
-// have been registered with a va_space. This returns NULL if the GPU is not present, or not
-// registered with the va_space.
+// Get a registered gpu by uuid. This restricts the search for GPUs, to those
+// that have been registered with a va_space. This returns NULL if the GPU is
+// not present, or not registered with the va_space.
 //
 // LOCKING: The VA space lock must be held.
 uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
@@ -501,13 +506,19 @@ bool uvm_va_space_can_read_duplicate(uvm_va_space_t *va_space, uvm_gpu_t *changi
 // Register a gpu in the va space
 // Note that each gpu can be only registered once in a va space
 //
+// The input gpu_uuid is for the phyisical GPU. The user_rm_va_space argument
+// identifies the SMC partition if provided and SMC is enabled.
+//
 // This call returns whether the GPU memory is a NUMA node in the kernel and the
 // corresponding node id.
+// It also returns the GI UUID (if gpu_uuid is a SMC partition) or a copy of
+// gpu_uuid if the GPU is not SMC capable or SMC is not enabled.
 NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
                                    const NvProcessorUuid *gpu_uuid,
                                    const uvm_rm_user_object_t *user_rm_va_space,
                                    NvBool *numa_enabled,
-                                    NvS32 *numa_node_id);
+                                    NvS32 *numa_node_id,
+                                    NvProcessorUuid *uuid_out);

 // Unregister a gpu from the va space
 NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
        }
    }

-    if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
+    if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
+        uvm_va_space_pageable_mem_access_supported(va_space)) {
+
        #if UVM_CAN_USE_MMU_NOTIFIERS()
            // Initialize MMU interval notifiers for this process. This allows
            // mmu_interval_notifier_insert() to be called without holding the
--- a/kernel-open/nvidia/internal_crypt_lib.h
+++ b/kernel-open/nvidia/internal_crypt_lib.h
@@ -1,5 +1,5 @@
 /*
-* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -53,6 +53,7 @@
    (defined(CONFIG_CRYPTO_HMAC) || defined(CONFIG_CRYPTO_HMAC_MODULE)) && \
    (defined(CONFIG_CRYPTO_ECDH) || defined(CONFIG_CRYPTO_ECDH_MODULE)) && \
    (defined(CONFIG_CRYPTO_ECDSA) || defined(CONFIG_CRYPTO_ECDSA_MODULE)) && \
+    (defined(CONFIG_CRYPTO_RSA) || defined(CONFIG_CRYPTO_RSA_MODULE)) && \
    (defined(CONFIG_X509_CERTIFICATE_PARSER) || defined(CONFIG_X509_CERTIFICATE_PARSER_MODULE))
 #define NV_CONFIG_CRYPTO_PRESENT 1
 #endif
@@ -151,4 +152,17 @@ bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public,
 bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid,
                       const uint8_t *message_hash, size_t hash_size,
                       const uint8_t *signature, size_t sig_size);
+
+bool lkca_rsa_verify(void *rsa_context, size_t hash_nid,
+                     const uint8_t *message_hash, size_t hash_size,
+                     const uint8_t *signature, size_t sig_size);
+
+bool lkca_rsa_pkcs1_sign(void *rsa_context, size_t hash_nid,
+                         const uint8_t *message_hash, size_t hash_size,
+                         uint8_t *signature, size_t *sig_size);
+
+bool lkca_rsa_pss_sign(void *rsa_context, size_t hash_nid,
+                       const uint8_t *message_hash, size_t hash_size,
+                       uint8_t *signature, size_t *sig_size);
+
 #endif
--- a/kernel-open/nvidia/libspdm_rsa.c
+++ b/kernel-open/nvidia/libspdm_rsa.c
@@ -0,0 +1,611 @@
+/*
+* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-License-Identifier: MIT
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*/
+
+#include "internal_crypt_lib.h"
+#include "library/cryptlib.h"
+
+#ifdef USE_LKCA
+#include <linux/module.h>
+#include <linux/mpi.h>
+#include <linux/random.h>
+
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
+
+/* ------------------------ Macros & Defines ------------------------------- */
+#define GET_MOST_SIGNIFICANT_BIT(keySize)      (keySize > 0 ? ((keySize - 1) & 7) : 0)
+#define GET_ENC_MESSAGE_SIZE_BYTE(keySize)     (keySize + 7) >> 3;
+#define PKCS1_MGF1_COUNTER_SIZE_BYTE           (4)
+#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE        (8)
+#define RSA_PSS_TRAILER_FIELD                  (0xbc)
+#define SHIFT_RIGHT_AND_GET_BYTE(val, x)       ((val >> x) & 0xFF)
+#define BITS_TO_BYTES(b)                       (b >> 3)
+
+static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
+
+struct rsa_ctx
+{
+    struct rsa_key key;
+    bool   pub_key_set;
+    bool   priv_key_set;
+    int    size;
+};
+#endif // #ifdef USE_LKCA
+
+/*!
+ * Creating and initializing a RSA context.
+ *
+ * @return  : A void pointer points to a RSA context
+ *
+*/
+void *libspdm_rsa_new
+(
+    void
+)
+{
+#ifndef USE_LKCA
+    return NULL;
+#else
+    struct rsa_ctx *ctx;
+
+    ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+
+    if (ctx == NULL)
+    {
+        return NULL;
+    }
+
+    memset(ctx, 0, sizeof(*ctx));
+
+    ctx->pub_key_set = false;
+    ctx->priv_key_set = false;
+
+    return ctx;
+#endif
+}
+
+/*!
+ * To free a RSA context.
+ *
+ * @param rsa_context   : A RSA context pointer
+ *
+*/
+void libspdm_rsa_free
+(
+    void *rsa_context
+)
+{
+#ifdef USE_LKCA
+    struct rsa_ctx *ctx = rsa_context;
+
+    if (ctx != NULL)
+    {
+        if (ctx->key.n) kfree(ctx->key.n);
+        if (ctx->key.e) kfree(ctx->key.e);
+        if (ctx->key.d) kfree(ctx->key.d);
+        if (ctx->key.q) kfree(ctx->key.q);
+        if (ctx->key.p) kfree(ctx->key.p);
+        if (ctx->key.dq) kfree(ctx->key.dq);
+        if (ctx->key.dp) kfree(ctx->key.dp);
+        if (ctx->key.qinv) kfree(ctx->key.qinv);
+        kfree(ctx);
+    }
+#endif
+}
+
+#define rsa_set_key_case(a, a_sz, A) \
+    case A: \
+    { \
+        if (ctx->key.a) { \
+            kfree(ctx->key.a); \
+        } \
+        ctx->key.a = shadow_num; \
+        ctx->key.a_sz = bn_size; \
+        break; \
+    }
+/*!
+ * To set key into RSA context.
+ *
+ * @param rsa_context   : A RSA context pointer
+ * @param key_tag       : Indicate key tag for RSA key
+ * @param big_number    : A big nuMber buffer to store rsa KEY
+ * @param bn_size       : The size of bug number
+ *
+ * @Return              : True if OK; otherwise return False
+*/
+bool libspdm_rsa_set_key
+(
+    void                         *rsa_context,
+    const libspdm_rsa_key_tag_t  key_tag,
+    const uint8_t               *big_number,
+    size_t                       bn_size
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    struct rsa_ctx *ctx = rsa_context;
+    uint8_t *shadow_num;
+
+    if (ctx == NULL)
+    {
+        return false;
+    }
+
+    // Quick sanity check if tag is valid
+    switch (key_tag)
+    {
+        case LIBSPDM_RSA_KEY_N:
+        case LIBSPDM_RSA_KEY_E:
+        case LIBSPDM_RSA_KEY_D:
+        case LIBSPDM_RSA_KEY_Q:
+        case LIBSPDM_RSA_KEY_P:
+        case LIBSPDM_RSA_KEY_DP:
+        case LIBSPDM_RSA_KEY_DQ:
+        case LIBSPDM_RSA_KEY_Q_INV:
+            break;
+        default:
+            return false;
+            break;
+    }
+
+    if (big_number != NULL)
+    {
+        shadow_num = kmalloc(bn_size, GFP_KERNEL);
+        if (shadow_num == NULL)
+        {
+            return false;
+        }
+        memcpy(shadow_num, big_number, bn_size);
+    }
+    else
+    {
+        shadow_num = NULL;
+        bn_size = 0;
+    }
+
+    switch (key_tag)
+    {
+        rsa_set_key_case(n, n_sz, LIBSPDM_RSA_KEY_N)
+        rsa_set_key_case(e, e_sz, LIBSPDM_RSA_KEY_E)
+        rsa_set_key_case(d, d_sz, LIBSPDM_RSA_KEY_D)
+        rsa_set_key_case(q, q_sz, LIBSPDM_RSA_KEY_Q)
+        rsa_set_key_case(p, p_sz, LIBSPDM_RSA_KEY_P)
+        rsa_set_key_case(dq, dq_sz, LIBSPDM_RSA_KEY_DQ)
+        rsa_set_key_case(dp, dp_sz, LIBSPDM_RSA_KEY_DP)
+        rsa_set_key_case(qinv, qinv_sz, LIBSPDM_RSA_KEY_Q_INV)
+        default:
+            // We can't get here ever
+            break;
+    }
+
+    return true;
+#endif
+}
+
+/*!
+ * Perform PKCS1 MGF1 operation.
+ *
+ * @param mask            : A mask pointer to store return data
+ * @param maskedDB_length : Indicate mask data block length
+ * @param seed            : A seed pointer to store random values
+ * @param seed_length     : The seed length
+ * @param hash_nid        : The hash NID
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+static bool NV_PKCS1_MGF1
+(
+    uint8_t       *mask,
+    size_t         maskedDB_length,
+    const uint8_t *seed,
+    size_t         seed_length,
+    size_t         hash_nid
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    size_t   mdLength;
+    size_t   counter;
+    size_t   outLength;
+    uint8_t  counterBuf[4];
+    void    *sha384_ctx = NULL;
+    uint8_t  hash_value[LIBSPDM_SHA384_DIGEST_SIZE];
+    bool     status = false;
+
+    if (mask == NULL || seed == NULL)
+    {
+        return false;
+    }
+
+    // Only support SHA384 for MGF1 now.
+    if (hash_nid == LIBSPDM_CRYPTO_NID_SHA384)
+    {
+        mdLength = LIBSPDM_SHA384_DIGEST_SIZE;
+    }
+    else
+    {
+        return false;
+    }
+
+    sha384_ctx = libspdm_sha384_new();
+
+    if (sha384_ctx == NULL)
+    {
+       pr_err("%s : libspdm_sha384_new() failed \n", __FUNCTION__);
+       return false;
+    }
+
+    for (counter = 0, outLength = 0; outLength < maskedDB_length; counter++)
+    {
+        counterBuf[0] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 24);
+        counterBuf[1] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 16);
+        counterBuf[2] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 8);
+        counterBuf[3] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 0);
+
+        status = libspdm_sha384_init(sha384_ctx);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_init() failed !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        status = libspdm_sha384_update(sha384_ctx, seed, seed_length);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_update() failed(seed) !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        status = libspdm_sha384_update(sha384_ctx, counterBuf, 4);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_update() failed(counterBuf) !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        if (outLength + mdLength <= maskedDB_length)
+        {
+            status = libspdm_sha384_final(sha384_ctx, mask + outLength);
+
+            if (!status)
+            {
+                pr_err("%s: libspdm_sha384_final() failed (<= maskedDB_length) !! \n", __FUNCTION__);
+                goto _error_exit;
+            }
+            outLength += mdLength;
+        }
+        else
+        {
+            status = libspdm_sha384_final(sha384_ctx, hash_value);
+
+            if (!status)
+            {
+                pr_err("%s: libspdm_sha384_final() failed(> maskedDB_length) !! \n", __FUNCTION__);
+                goto _error_exit;
+            }
+
+            memcpy(mask + outLength, hash_value, maskedDB_length - outLength);
+            outLength = maskedDB_length;
+        }
+    }
+    status = true;
+
+_error_exit:
+    libspdm_sha384_free(sha384_ctx);
+    return status;
+#endif
+}
+
+/*
+    0xbc : Trailer Field
+                          +-----------+
+                          |     M     |
+                          +-----------+
+                                |
+                                V
+                              Hash
+                                |
+                                V
+                  +--------+----------+----------+
+             M' = |Padding1|  mHash   |   salt   |
+                  +--------+----------+----------+
+                  |--------------|---------------|
+                                 |
+       +--------+----------+     V
+ DB =  |Padding2|   salt   |   Hash
+       +--------+----------+     |
+                 |               |
+                 V               |
+                xor <--- MGF <---|
+                 |               |
+                 |               |
+                 V               V
+       +-------------------+----------+----+
+ EM =  |    maskedDB       |     H    |0xbc|
+       +-------------------+----------+----+
+
+salt : The random number, we hardcode its size as hash size here.
+M'   : The concatenation of padding1 + message hash  + salt
+MGF  : Mask generation function.
+       A mask generation function takes an octet string of variable length
+       and a desired output length as input, and outputs an octet string of
+       the desired length
+       MGF1 is a Mask Generation Function based on a hash function.
+
+Padding1 : 8 zeros
+Padding2 : 0x01
+
+The detail spec is at https://datatracker.ietf.org/doc/html/rfc2437
+*/
+
+/*!
+ * Set keys and call PKCS1_MGF1 to generate signature.
+ *
+ * @param rsa_context     : A RSA context pointer
+ * @param hash_nid        : The hash NID
+ * @param message_hash    : The pointer to message hash
+ * @param signature       : The pointer is used to store generated signature
+ * @param sig_size        : For input, a pointer store signature buffer size.
+ *                          For output, a pointer store generate signature size.
+ * @param salt_Length     : The salt length for RSA-PSS algorithm
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+static bool nvRsaPaddingAddPkcs1PssMgf1
+(
+    void          *rsa_context,
+    size_t         hash_nid,
+    const uint8_t *message_hash,
+    size_t         hash_size,
+    uint8_t       *signature,
+    size_t        *sig_size,
+    int            salt_length
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    bool     status = false;
+    struct   rsa_ctx *ctx = rsa_context;
+    void    *sha384_ctx = NULL;
+    uint32_t keySize;
+    uint32_t msBits;
+    size_t   emLength;
+    uint8_t  saltBuf[64];
+    size_t   maskedDB_length;
+    size_t   i;
+    uint8_t  *tmp_H;
+    uint8_t  *tmp_P;
+    int       rc;
+    unsigned int ret_data_size;
+    MPI  mpi_n = NULL;
+    MPI  mpi_d = NULL;
+    MPI  mpi_c = mpi_alloc(0);
+    MPI  mpi_p = mpi_alloc(0);
+
+    // read modulus to BN struct
+    mpi_n = mpi_read_raw_data(ctx->key.n, ctx->key.n_sz);
+    if (mpi_n == NULL)
+    {
+        pr_err("%s : mpi_n create failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // read private exponent to BN struct
+    mpi_d = mpi_read_raw_data(ctx->key.d, ctx->key.d_sz);
+    if (mpi_d == NULL)
+    {
+        pr_err("%s : mpi_d create failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    keySize  = mpi_n->nbits;
+    msBits   = GET_MOST_SIGNIFICANT_BIT(keySize);
+    emLength = BITS_TO_BYTES(keySize);
+
+    if (msBits == 0)
+    {
+        *signature++ = 0;
+        emLength--;
+    }
+
+    if (emLength < hash_size + 2)
+    {
+        pr_err("%s : emLength < hash_size + 2 !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // Now, we only support salt_length == LIBSPDM_SHA384_DIGEST_SIZE
+    if (salt_length != LIBSPDM_SHA384_DIGEST_SIZE ||
+        hash_nid    != LIBSPDM_CRYPTO_NID_SHA384)
+    {
+        pr_err("%s : Invalid salt_length (%x) \n", __FUNCTION__, salt_length);
+        goto _error_exit;
+    }
+
+    get_random_bytes(saltBuf, salt_length);
+
+    maskedDB_length = emLength - hash_size - 1;
+    tmp_H = signature + maskedDB_length;
+    sha384_ctx = libspdm_sha384_new();
+
+    if (sha384_ctx == NULL)
+    {
+        pr_err("%s : libspdm_sha384_new() failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_init(sha384_ctx);
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_init() failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_update(sha384_ctx, zeroes, sizeof(zeroes));
+
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_update() with zeros failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_update(sha384_ctx, message_hash, hash_size);
+
+    if (!status)
+    {
+        pr_err("%s: libspdm_sha384_update() with message_hash failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    if (salt_length)
+    {
+        status = libspdm_sha384_update(sha384_ctx, saltBuf, salt_length);
+        if (!status)
+        {
+            pr_err("%s : libspdm_sha384_update() with saltBuf failed !!\n", __FUNCTION__);
+            goto _error_exit;
+        }
+    }
+
+    status = libspdm_sha384_final(sha384_ctx, tmp_H);
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_final() with tmp_H failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    /* Generate dbMask in place then perform XOR on it */
+    status = NV_PKCS1_MGF1(signature, maskedDB_length, tmp_H, hash_size, hash_nid);
+
+    if (!status)
+    {
+        pr_err("%s : NV_PKCS1_MGF1() failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    tmp_P = signature;
+    tmp_P += emLength - salt_length - hash_size - 2;
+    *tmp_P++ ^= 0x1;
+
+    if (salt_length > 0)
+    {
+        for (i = 0; i < salt_length; i++)
+        {
+            *tmp_P++ ^= saltBuf[i];
+        }
+    }
+
+    if (msBits)
+    {
+        signature[0] &= 0xFF >> (8 - msBits);
+    }
+
+    /* H is already in place so just set final 0xbc */
+    signature[emLength - 1] = RSA_PSS_TRAILER_FIELD;
+
+    // read signature to BN struct
+    mpi_p = mpi_read_raw_data(signature, emLength);
+    if (mpi_p == NULL)
+    {
+        pr_err("%s : mpi_p() create failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // Staring RSA encryption with private key over signature.
+    rc = mpi_powm(mpi_c, mpi_p, mpi_d, mpi_n);
+    if (rc != 0)
+    {
+        pr_err("%s : mpi_powm()  failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    rc = mpi_read_buffer(mpi_c, signature, *sig_size, &ret_data_size, NULL);
+    if (rc != 0)
+    {
+        pr_err("%s : mpi_read_buffer()  failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    if (ret_data_size > *sig_size)
+    {
+        goto _error_exit;
+    }
+
+    *sig_size = ret_data_size;
+    status = true;
+
+_error_exit:
+
+   mpi_free(mpi_n);
+   mpi_free(mpi_d);
+   mpi_free(mpi_c);
+   mpi_free(mpi_p);
+
+   libspdm_sha384_free(sha384_ctx);
+
+   return status;
+
+#endif
+}
+
+/*!
+ * Perform RSA-PSS sigaature sign process with LKCA library.
+ *
+ * @param rsa_context     : A RSA context pointer
+ * @param hash_nid        : The hash NID
+ * @param message_hash    : The pointer to  message hash
+ * @param signature       : The pointer is used to store generated signature
+ * @param sig_size        : For input, a pointer store signature buffer size.
+ *                          For output, a pointer store generate signature size.
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+bool lkca_rsa_pss_sign
+(
+    void          *rsa_context,
+    size_t         hash_nid,
+    const uint8_t *message_hash,
+    size_t         hash_size,
+    uint8_t       *signature,
+    size_t        *sig_size
+)
+{
+#ifndef USE_LKCA
+    return true;
+#else
+    return  nvRsaPaddingAddPkcs1PssMgf1(rsa_context,
+                                        hash_nid,
+                                        message_hash,
+                                        hash_size,
+                                        signature,
+                                        sig_size,
+                                        LIBSPDM_SHA384_DIGEST_SIZE);
+#endif
+}
+
--- a/kernel-open/nvidia/libspdm_rsa_ext.c
+++ b/kernel-open/nvidia/libspdm_rsa_ext.c
@@ -0,0 +1,85 @@
+/*
+* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-License-Identifier: MIT
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
+* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
+*/
+
+/** @file
+ * RSA Asymmetric Cipher Wrapper Implementation.
+ *
+ * This file implements following APIs which provide more capabilities for RSA:
+ * 1) rsa_pss_sign
+ *
+ * RFC 8017 - PKCS #1: RSA Cryptography Specifications version 2.2
+ **/
+
+#include "internal_crypt_lib.h"
+#include "library/cryptlib.h"
+
+/**
+ * Carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme.
+ *
+ * This function carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme defined in
+ * RSA PKCS#1 v2.2.
+ *
+ * The salt length is same as digest length.
+ *
+ * If the signature buffer is too small to hold the contents of signature, false
+ * is returned and sig_size is set to the required buffer size to obtain the signature.
+ *
+ * If rsa_context is NULL, then return false.
+ * If message_hash is NULL, then return false.
+ * If hash_size need match the hash_nid. nid could be SHA256, SHA384, SHA512, SHA3_256, SHA3_384, SHA3_512.
+ * If sig_size is large enough but signature is NULL, then return false.
+ *
+ * @param[in]       rsa_context   Pointer to RSA context for signature generation.
+ * @param[in]       hash_nid      hash NID
+ * @param[in]       message_hash  Pointer to octet message hash to be signed.
+ * @param[in]       hash_size     size of the message hash in bytes.
+ * @param[out]      signature    Pointer to buffer to receive RSA-SSA PSS signature.
+ * @param[in, out]  sig_size      On input, the size of signature buffer in bytes.
+ *                              On output, the size of data returned in signature buffer in bytes.
+ *
+ * @retval  true   signature successfully generated in RSA-SSA PSS.
+ * @retval  false  signature generation failed.
+ * @retval  false  sig_size is too small.
+ *
+ **/
+bool libspdm_rsa_pss_sign(void *rsa_context, size_t hash_nid,
+                          const uint8_t *message_hash, size_t hash_size,
+                          uint8_t *signature, size_t *sig_size)
+{
+    return lkca_rsa_pss_sign(rsa_context, hash_nid, message_hash, hash_size,
+                             signature, sig_size);
+}
+//
+// In RM, we just need sign process; so we stub verification function.
+// Verification function is needed in GSP code only,
+//
+bool libspdm_rsa_pss_verify(void *rsa_context, size_t hash_nid,
+                            const uint8_t *message_hash, size_t hash_size,
+                            const uint8_t *signature, size_t sig_size)
+{
+    return false;
+}
+
--- a/kernel-open/nvidia/nv-caps-imex.c
+++ b/kernel-open/nvidia/nv-caps-imex.c
@@ -0,0 +1,153 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv-linux.h"
+
+extern int NVreg_ImexChannelCount;
+
+static int nv_caps_imex_open(struct inode *inode, struct file *file)
+{
+    return 0;
+}
+
+static int nv_caps_imex_release(struct inode *inode, struct file *file)
+{
+    return 0;
+}
+
+static struct file_operations g_nv_caps_imex_fops =
+{
+    .owner = THIS_MODULE,
+    .open    = nv_caps_imex_open,
+    .release = nv_caps_imex_release
+};
+
+struct
+{
+    NvBool initialized;
+    struct cdev cdev;
+    dev_t devno;
+} g_nv_caps_imex;
+
+int NV_API_CALL nv_caps_imex_channel_get(int fd)
+{
+#if NV_FILESYSTEM_ACCESS_AVAILABLE
+    struct file *file;
+    struct inode *inode;
+    int channel = -1;
+
+    file = fget(fd);
+    if (file == NULL)
+    {
+        return channel;
+    }
+
+    inode = NV_FILE_INODE(file);
+    if (inode == NULL)
+    {
+        goto out;
+    }
+
+    /* Make sure the fd belongs to the nv-caps-imex-drv */
+    if (file->f_op != &g_nv_caps_imex_fops)
+    {
+        goto out;
+    }
+
+    /* minor number is same as channel */
+    channel = MINOR(inode->i_rdev);
+
+out:
+    fput(file);
+
+    return channel;
+#else
+    return -1;
+#endif
+}
+
+int NV_API_CALL nv_caps_imex_channel_count(void)
+{
+    return NVreg_ImexChannelCount;
+}
+
+int NV_API_CALL nv_caps_imex_init(void)
+{
+    int rc;
+
+    if (g_nv_caps_imex.initialized)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex is already initialized.\n");
+        return -EBUSY;
+    }
+
+    if (NVreg_ImexChannelCount == 0)
+    {
+        nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
+        return 0;
+    }
+
+    rc = alloc_chrdev_region(&g_nv_caps_imex.devno, 0,
+                             NVreg_ImexChannelCount,
+                             "nvidia-caps-imex-channels");
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to create cdev.\n");
+        return rc;
+    }
+
+    cdev_init(&g_nv_caps_imex.cdev, &g_nv_caps_imex_fops);
+
+    g_nv_caps_imex.cdev.owner = THIS_MODULE;
+
+    rc = cdev_add(&g_nv_caps_imex.cdev, g_nv_caps_imex.devno,
+                  NVreg_ImexChannelCount);
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to add cdev.\n");
+        goto cdev_add_fail;
+    }
+
+    g_nv_caps_imex.initialized = NV_TRUE;
+
+    return 0;
+
+cdev_add_fail:
+    unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
+
+    return rc;
+}
+
+void NV_API_CALL nv_caps_imex_exit(void)
+{
+    if (!g_nv_caps_imex.initialized)
+    {
+        return;
+    }
+
+    cdev_del(&g_nv_caps_imex.cdev);
+
+    unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
+
+    g_nv_caps_imex.initialized = NV_FALSE;
+}
--- a/kernel-open/nvidia/nv-caps-imex.h
+++ b/kernel-open/nvidia/nv-caps-imex.h
@@ -0,0 +1,34 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NV_CAPS_IMEX_H_
+#define _NV_CAPS_IMEX_H_
+
+#include <nv-kernel-interface-api.h>
+
+int NV_API_CALL nv_caps_imex_init(void);
+void NV_API_CALL nv_caps_imex_exit(void);
+int NV_API_CALL nv_caps_imex_channel_get(int fd);
+int NV_API_CALL nv_caps_imex_channel_count(void);
+
+#endif  /* _NV_CAPS_IMEX_H_ */
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -577,12 +577,9 @@ int nvidia_mmap_helper(
            //
            // This path is similar to the sysmem mapping code.
            // TODO: Refactor is needed as part of bug#2001704.
-            // Use pfn_valid to determine whether the physical address has
-            // backing struct page. This is used to isolate P8 from P9.
            //
            if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
-                !IS_REG_OFFSET(nv, access_start, access_len) &&
-                (pfn_valid(PFN_DOWN(mmap_start))))
+                !IS_REG_OFFSET(nv, access_start, access_len))
            {
                ret = nvidia_mmap_numa(vma, mmap_context);
                if (ret)
--- a/kernel-open/nvidia/nv-reg.h
+++ b/kernel-open/nvidia/nv-reg.h
@@ -839,6 +839,45 @@
 #define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
 #define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)

+/*
+ * Option: NVreg_ImexChannelCount
+ *
+ * Description:
+ *
+ * This option allows users to specify the number of IMEX (import/export)
+ * channels. Within an IMEX domain, the channels allow sharing memory
+ * securely in a multi-user environment using the CUDA driver's fabric handle
+ * based APIs.
+ *
+ * An IMEX domain is either an OS instance or a group of securely
+ * connected OS instances using the NVIDIA IMEX daemon. The option must
+ * be set to the same value on each OS instance within the IMEX domain.
+ *
+ * An IMEX channel is a logical entity that is represented by a /dev node.
+ * The IMEX channels are global resources within the IMEX domain. When
+ * exporter and importer CUDA processes have been granted access to the
+ * same IMEX channel, they can securely share memory.
+ *
+ * Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
+ * the related CUDA APIs will fail with an insufficient permission error until
+ * the /dev nodes are set up. The creation of these /dev nodes,
+ * /dev/nvidia-caps-imex-channels/channelN, must be handled by the
+ * administrator, where N is the minor number. The major number can be
+ * queried from /proc/devices.
+ *
+ * nvidia-modprobe CLI support is available to set up the /dev nodes.
+ * NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
+ * and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
+ *
+ * Possible values:
+ *  0 - Disable IMEX using CUDA driver's fabric handles.
+ *  N - N IMEX channels will be enabled in the driver to facilitate N
+ *      concurrent users. Default value is 2048 channels, and the current
+ *      maximum value is 20-bit, same as Linux dev_t's minor number limit.
+ */
+#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
+#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
+
 #if defined(NV_DEFINE_REGISTRY_KEY_TABLE)

 /*
@@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
 NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
 NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
 NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
+NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);

 /*
 *----------------registry database definition----------------------
@@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
+    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
    {NULL, NULL}
 };

--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -55,6 +55,7 @@
 #include "nv-kthread-q.h"
 #include "nv-pat.h"
 #include "nv-dmabuf.h"
+#include "nv-caps-imex.h"

 #if !defined(CONFIG_RETPOLINE)
 #include "nv-retpoline.h"
@@ -825,11 +826,18 @@ static int __init nvidia_init_module(void)
        goto procfs_exit;
    }

+    rc = nv_caps_imex_init();
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
+        goto caps_root_exit;
+    }
+
    rc = nv_module_init(&sp);
    if (rc < 0)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
-        goto caps_root_exit;
+        goto caps_imex_exit;
    }

    count = nvos_count_devices();
@@ -941,6 +949,9 @@ drivers_exit:
 module_exit:
    nv_module_exit(sp);

+caps_imex_exit:
+    nv_caps_imex_exit();
+
 caps_root_exit:
    nv_caps_root_exit();

@@ -967,6 +978,8 @@ static void __exit nvidia_exit_module(void)

    nv_module_exit(sp);

+    nv_caps_imex_exit();
+
    nv_caps_root_exit();

    nv_procfs_exit();
@@ -2040,7 +2053,7 @@ nvidia_close_callback(
 {
    nv_linux_state_t *nvl;
    nv_state_t *nv;
-    nvidia_stack_t *sp;
+    nvidia_stack_t *sp = nvlfp->sp;
    NvBool bRemove = NV_FALSE;

    nvl = nvlfp->nvptr;
@@ -2052,12 +2065,11 @@ nvidia_close_callback(
         */

        nv_free_file_private(nvlfp);
-        nv_kmem_cache_free_stack(nvlfp->sp);
+        nv_kmem_cache_free_stack(sp);
        return;
    }

    nv = NV_STATE_PTR(nvl);
-    sp = nvlfp->sp;

    rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);

@@ -6050,6 +6062,131 @@ failed:
    return NV_ERR_NOT_SUPPORTED;
 }

+void NV_API_CALL nv_get_screen_info(
+    nv_state_t  *nv,
+    NvU64       *pPhysicalAddress,
+    NvU32       *pFbWidth,
+    NvU32       *pFbHeight,
+    NvU32       *pFbDepth,
+    NvU32       *pFbPitch,
+    NvU64       *pFbSize
+)
+{
+    *pPhysicalAddress = 0;
+    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
+
+#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
+    if (num_registered_fb > 0)
+    {
+        int i;
+
+        for (i = 0; i < num_registered_fb; i++)
+        {
+            if (!registered_fb[i])
+                continue;
+
+            /* Make sure base address is mapped to GPU BAR */
+            if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
+            {
+                *pPhysicalAddress = registered_fb[i]->fix.smem_start;
+                *pFbWidth = registered_fb[i]->var.xres;
+                *pFbHeight = registered_fb[i]->var.yres;
+                *pFbDepth = registered_fb[i]->var.bits_per_pixel;
+                *pFbPitch = registered_fb[i]->fix.line_length;
+                *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
+                return;
+            }
+        }
+    }
+#endif
+
+    /*
+     * If the screen info is not found in the registered FBs then fallback
+     * to the screen_info structure.
+     *
+     * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
+     * generic framebuffers so the new generic system-framebuffer drivers can
+     * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
+     * device created by SYSFB_SIMPLEFB.
+     *
+     * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
+     * information required by nv_get_screen_info(), therefore you need to
+     * fall back onto the screen_info structure.
+     *
+     * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
+     * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
+     */
+
+#if NV_CHECK_EXPORT_SYMBOL(screen_info)
+    /*
+     * If there is not a framebuffer console, return 0 size.
+     *
+     * orig_video_isVGA is set to 1 during early Linux kernel
+     * initialization, and then will be set to a value, such as
+     * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
+     */
+    if (screen_info.orig_video_isVGA > 1)
+    {
+        NvU64 physAddr = screen_info.lfb_base;
+#if defined(VIDEO_CAPABILITY_64BIT_BASE)
+        physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
+#endif
+
+        /* Make sure base address is mapped to GPU BAR */
+        if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
+        {
+            *pPhysicalAddress = physAddr;
+            *pFbWidth = screen_info.lfb_width;
+            *pFbHeight = screen_info.lfb_height;
+            *pFbDepth = screen_info.lfb_depth;
+            *pFbPitch = screen_info.lfb_linelength;
+            *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
+        }
+    }
+#else
+    {
+        nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
+        struct pci_dev *pci_dev = nvl->pci_dev;
+        int i;
+
+        if (pci_dev == NULL)
+            return;
+
+        BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
+        for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
+        {
+            int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
+            struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
+            struct resource *res = gpu_bar_res->child;
+
+            /*
+             * Console resource will become child resource of pci-dev resource.
+             * Check if child resource start address matches with expected
+             * console start address.
+             */
+            if ((res != NULL) &&
+                NV_IS_CONSOLE_MAPPED(nv, res->start))
+            {
+                NvU32 res_name_len = strlen(res->name);
+
+                /*
+                 * The resource name ends with 'fb' (efifb, vesafb, etc.).
+                 * For simple-framebuffer, the resource name is 'BOOTFB'.
+                 * Confirm if the resources name either ends with 'fb' or 'FB'.
+                 */
+                if ((res_name_len > 2) &&
+                    !strcasecmp((res->name + res_name_len - 2), "fb"))
+                {
+                    *pPhysicalAddress = res->start;
+                    *pFbSize = resource_size(res);
+                    return;
+                }
+            }
+        }
+    }
+#endif
+}
+

 module_init(nvidia_init_module);
 module_exit(nvidia_exit_module);
--- a/kernel-open/nvidia/nv_gpu_ops.h
+++ b/kernel-open/nvidia/nv_gpu_ops.h
@@ -279,9 +279,11 @@ NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
                                          char *methodStream,
                                          NvU32 methodStreamSize);

-NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
+NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
+                                             NvBool bCopyAndFlush);

-NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo, NvBool bEnable);
+NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
+                                       NvBool bEnable);

 // Interface used for CCSL

--- a/kernel-open/nvidia/nv_uvm_interface.c
+++ b/kernel-open/nvidia/nv_uvm_interface.c
@@ -985,24 +985,30 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
 }
 EXPORT_SYMBOL(nvUvmInterfaceGetNonReplayableFaults);

-NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device)
+NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
+                                                   NvBool bCopyAndFlush)
 {
    nvidia_stack_t *sp = nvUvmGetSafeStack();
    NV_STATUS status;

-    status = rm_gpu_ops_flush_replayable_fault_buffer(sp, (gpuDeviceHandle)device);
+    status = rm_gpu_ops_flush_replayable_fault_buffer(sp,
+                                                      pFaultInfo,
+                                                      bCopyAndFlush);

    nvUvmFreeSafeStack(sp);
    return status;
 }
 EXPORT_SYMBOL(nvUvmInterfaceFlushReplayableFaultBuffer);

-NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable)
+NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
+                                             NvBool bEnable)
 {
    nvidia_stack_t *sp = nvUvmGetSafeStack();
    NV_STATUS status;

-    status = rm_gpu_ops_toggle_prefetch_faults(sp, pFaultInfo, bEnable);
+    status = rm_gpu_ops_toggle_prefetch_faults(sp,
+                                               pFaultInfo,
+                                               bEnable);

    nvUvmFreeSafeStack(sp);
    return status;
--- a/kernel-open/nvidia/nvidia-sources.Kbuild
+++ b/kernel-open/nvidia/nvidia-sources.Kbuild
@@ -30,18 +30,21 @@ NVIDIA_SOURCES += nvidia/nv-report-err.c
 NVIDIA_SOURCES += nvidia/nv-rsync.c
 NVIDIA_SOURCES += nvidia/nv-msi.c
 NVIDIA_SOURCES += nvidia/nv-caps.c
+NVIDIA_SOURCES += nvidia/nv-caps-imex.c
 NVIDIA_SOURCES += nvidia/nv_uvm_interface.c
 NVIDIA_SOURCES += nvidia/libspdm_aead.c
 NVIDIA_SOURCES += nvidia/libspdm_ecc.c
 NVIDIA_SOURCES += nvidia/libspdm_hkdf.c
 NVIDIA_SOURCES += nvidia/libspdm_rand.c
 NVIDIA_SOURCES += nvidia/libspdm_shash.c
+NVIDIA_SOURCES += nvidia/libspdm_rsa.c
 NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
 NVIDIA_SOURCES += nvidia/libspdm_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_ec.c
 NVIDIA_SOURCES += nvidia/libspdm_x509.c
+NVIDIA_SOURCES += nvidia/libspdm_rsa_ext.c
 NVIDIA_SOURCES += nvidia/nvlink_linux.c
 NVIDIA_SOURCES += nvidia/nvlink_caps.c
 NVIDIA_SOURCES += nvidia/linux_nvswitch.c
--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@@ -25,6 +25,7 @@

 #include "os-interface.h"
 #include "nv-linux.h"
+#include "nv-caps-imex.h"

 #include "nv-time.h"

@@ -59,6 +60,8 @@ NvBool os_dma_buf_enabled = NV_TRUE;
 NvBool os_dma_buf_enabled = NV_FALSE;
 #endif // CONFIG_DMA_SHARED_BUFFER

+NvBool os_imex_channel_is_supported = NV_TRUE;
+
 void NV_API_CALL os_disable_console_access(void)
 {
    console_lock();
@@ -1231,90 +1234,6 @@ NvBool NV_API_CALL os_is_efi_enabled(void)
    return efi_enabled(EFI_BOOT);
 }

-void NV_API_CALL os_get_screen_info(
-    NvU64 *pPhysicalAddress,
-    NvU32 *pFbWidth,
-    NvU32 *pFbHeight,
-    NvU32 *pFbDepth,
-    NvU32 *pFbPitch,
-    NvU64 consoleBar1Address,
-    NvU64 consoleBar2Address
-)
-{
-    *pPhysicalAddress = 0;
-    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
-
-#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
-    if (num_registered_fb > 0)
-    {
-        int i;
-
-        for (i = 0; i < num_registered_fb; i++)
-        {
-            if (!registered_fb[i])
-                continue;
-
-            /* Make sure base address is mapped to GPU BAR */
-            if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
-                (registered_fb[i]->fix.smem_start == consoleBar2Address))
-            {
-                *pPhysicalAddress = registered_fb[i]->fix.smem_start;
-                *pFbWidth = registered_fb[i]->var.xres;
-                *pFbHeight = registered_fb[i]->var.yres;
-                *pFbDepth = registered_fb[i]->var.bits_per_pixel;
-                *pFbPitch = registered_fb[i]->fix.line_length;
-                return;
-            }
-        }
-    }
-#endif
-
-    /*
-     * If the screen info is not found in the registered FBs then fallback
-     * to the screen_info structure.
-     *
-     * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
-     * generic framebuffers so the new generic system-framebuffer drivers can
-     * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
-     * device created by SYSFB_SIMPLEFB.
-     *
-     * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
-     * information required by os_get_screen_info(), therefore you need to
-     * fall back onto the screen_info structure.
-     *
-     * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
-     * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
-     */
-
-#if NV_CHECK_EXPORT_SYMBOL(screen_info)
-    /*
-     * If there is not a framebuffer console, return 0 size.
-     *
-     * orig_video_isVGA is set to 1 during early Linux kernel
-     * initialization, and then will be set to a value, such as
-     * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
-     */
-    if (screen_info.orig_video_isVGA > 1)
-    {
-        NvU64 physAddr = screen_info.lfb_base;
-#if defined(VIDEO_CAPABILITY_64BIT_BASE)
-        physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
-#endif
-
-        /* Make sure base address is mapped to GPU BAR */
-        if ((physAddr == consoleBar1Address) ||
-            (physAddr == consoleBar2Address))
-        {
-            *pPhysicalAddress = physAddr;
-            *pFbWidth = screen_info.lfb_width;
-            *pFbHeight = screen_info.lfb_height;
-            *pFbDepth = screen_info.lfb_depth;
-            *pFbPitch = screen_info.lfb_linelength;
-        }
-    }
-#endif
-}
-
 void NV_API_CALL os_dump_stack(void)
 {
    dump_stack();
@@ -2182,6 +2101,22 @@ void NV_API_CALL os_nv_cap_close_fd
    nv_cap_close_fd(fd);
 }

+NvS32 NV_API_CALL os_imex_channel_count
+(
+    void
+)
+{
+    return nv_caps_imex_channel_count();
+}
+
+NvS32 NV_API_CALL os_imex_channel_get
+(
+    NvU64 descriptor
+)
+{
+    return nv_caps_imex_channel_get((int)descriptor);
+}
+
 /*
 * Reads the total memory and free memory of a NUMA node from the kernel.
 */
--- a/kernel-open/nvidia/os-mlock.c
+++ b/kernel-open/nvidia/os-mlock.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -87,59 +87,10 @@ static NV_STATUS get_io_ptes(struct vm_area_struct *vma,
    return NV_OK;
 }

-/*!
- * @brief Pins user IO pages that have been mapped to the user processes virtual
- *        address space with remap_pfn_range.
- *
- * @param[in]     vma VMA that contains the virtual address range given by the
- *                    start and the page count.
- * @param[in]     start Beginning of the virtual address range of the IO pages.
- * @param[in]     page_count Number of pages to pin from start.
- * @param[in,out] page_array Storage array for pointers to the pinned pages.
- *                           Must be large enough to contain at least page_count
- *                           pointers.
- *
- * @return NV_OK if the pages were pinned successfully, error otherwise.
- */
-static NV_STATUS get_io_pages(struct vm_area_struct *vma,
-                              NvUPtr start,
-                              NvU64 page_count,
-                              struct page **page_array)
-{
-    NV_STATUS rmStatus = NV_OK;
-    NvU64 i, pinned = 0;
-    unsigned long pfn;
-
-    for (i = 0; i < page_count; i++)
-    {
-        if ((nv_follow_pfn(vma, (start + (i * PAGE_SIZE)), &pfn) < 0) ||
-            (!pfn_valid(pfn)))
-        {
-            rmStatus = NV_ERR_INVALID_ADDRESS;
-            break;
-        }
-
-        // Page-backed memory mapped to userspace with remap_pfn_range
-        page_array[i] = pfn_to_page(pfn);
-        get_page(page_array[i]);
-        pinned++;
-    }
-
-    if (pinned < page_count)
-    {
-        for (i = 0; i < pinned; i++)
-            put_page(page_array[i]);
-        rmStatus = NV_ERR_INVALID_ADDRESS;
-    }
-
-    return rmStatus;
-}
-
 NV_STATUS NV_API_CALL os_lookup_user_io_memory(
    void   *address,
    NvU64   page_count,
-    NvU64 **pte_array,
-    void  **page_array
+    NvU64 **pte_array
 )
 {
    NV_STATUS rmStatus;
@@ -187,18 +138,9 @@ NV_STATUS NV_API_CALL os_lookup_user_io_memory(
        goto done;
    }

-    if (pfn_valid(pfn))
-    {
-        rmStatus = get_io_pages(vma, start, page_count, (struct page **)result_array);
-        if (rmStatus == NV_OK)
-            *page_array = (void *)result_array;
-    }
-    else
-    {
-        rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
-        if (rmStatus == NV_OK)
-            *pte_array = (NvU64 *)result_array;
-    }
+    rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
+    if (rmStatus == NV_OK)
+        *pte_array = (NvU64 *)result_array;

 done:
    nv_mmap_read_unlock(mm);