570.123.07

2026-02-01 05:59:48 +00:00 · 2025-03-25 12:40:01 -07:00
parent 5e6ad2b575
commit 4d941c0b6e
146 changed files with 53927 additions and 54744 deletions
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -59,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -240,7 +240,7 @@ static void uvm_release_deferred(void *data)
    // Since this function is only scheduled to run when uvm_release() fails
    // to trylock-acquire the pm.lock, the following acquisition attempt
    // is expected to block this thread, and cause it to remain blocked until
-    // uvm_resume() releases the lock.  As a result, the deferred release
+    // uvm_resume() releases the lock. As a result, the deferred release
    // kthread queue may stall for long periods of time.
    uvm_down_read(&g_uvm_global.pm.lock);

@@ -292,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)

    // Because the kernel discards the status code returned from this release
    // callback, early exit in case of a pm.lock acquisition failure is not
-    // an option.  Instead, the teardown work normally performed synchronously
+    // an option. Instead, the teardown work normally performed synchronously
    // needs to be scheduled to run after uvm_resume() releases the lock.
    if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
        uvm_va_space_destroy(va_space);
        uvm_up_read(&g_uvm_global.pm.lock);
    }
    else {
-        // Remove references to this inode from the address_space.  This isn't
+        // Remove references to this inode from the address_space. This isn't
        // strictly necessary, as any CPU mappings of this file have already
        // been destroyed, and va_space->mapping won't be used again. Still,
        // the va_space survives the inode if its destruction is deferred, in
@@ -867,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    }

    // If the PM lock cannot be acquired, disable the VMA and report success
-    // to the caller.  The caller is expected to determine whether the
-    // map operation succeeded via an ioctl() call.  This is necessary to
+    // to the caller. The caller is expected to determine whether the
+    // map operation succeeded via an ioctl() call. This is necessary to
    // safely handle MAP_FIXED, which needs to complete atomically to prevent
    // the loss of the virtual address range.
    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@@ -1233,19 +1233,8 @@ static int uvm_init(void)
        goto error;
    }

-    pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
-
    if (uvm_enable_builtin_tests)
-        pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
-
-    // After Open RM is released, both the enclosing "#if" and this comment
-    // block should be removed, because the uvm_hmm_is_enabled_system_wide()
-    // check is both necessary and sufficient for reporting functionality.
-    // Until that time, however, we need to avoid advertisting UVM's ability to
-    // enable HMM functionality.
-
-    if (uvm_hmm_is_enabled_system_wide())
-        UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
+        UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");

    return 0;

@@ -1274,8 +1263,6 @@ static void uvm_exit(void)
    uvm_global_exit();

    uvm_test_unload_state_exit();
-
-    pr_info("Unloaded the UVM driver.\n");
 }

 static void __exit uvm_exit_entry(void)
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
-                                                                                 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ada covers 128 TB and that's the minimum size
@@ -82,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ampere covers 128 TB and that's the minimum
@@ -86,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2023 NVIDIA Corporation
+    Copyright (c) 2024-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -139,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
    uvm_ats_fault_invalidate_t *ats_invalidate;

    if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
    else
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;

    if (!ats_invalidate->tlb_batch_pending) {
        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
--- a/kernel-open/nvidia-uvm/uvm_blackwell.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell.c
@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Blackwell covers 64 PB and that's the minimum
@@ -85,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -110,16 +110,22 @@ typedef enum
 bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
 {
    uvm_channel_manager_t *manager = pool->manager;
+    uvm_gpu_t *gpu = manager->gpu;
    uvm_gpu_id_t id;

    if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
        return true;

-    for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
-        if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
+    uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
+    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
+        if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
            return true;
+        }
    }

+    uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
    return false;
 }

@@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
 {
    uvm_channel_pool_t *pool;
    NV_STATUS status = NV_OK;
+    uvm_gpu_t *gpu = channel_manager->gpu;
    uvm_gpu_id_t gpu_id;
    DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);

@@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
    // Use bitmap to track which were suspended.
    bitmap_zero(suspended_pools, channel_manager->num_channel_pools);

-    for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
        if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
            status = channel_pool_suspend_p2p(pool);
@@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
 void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
 {
    uvm_channel_pool_t *pool;
+    uvm_gpu_t *gpu = channel_manager->gpu;
    uvm_gpu_id_t gpu_id;
    DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);

@@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
    // Use bitmap to track which were suspended.
    bitmap_zero(resumed_pools, channel_manager->num_channel_pools);

-    for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
        if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
            channel_pool_resume_p2p(pool);
@@ -3243,9 +3255,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

    if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
-        pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
-                uvm_channel_num_gpfifo_entries,
-                manager->conf.num_gpfifo_entries);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
+                       uvm_channel_num_gpfifo_entries,
+                       manager->conf.num_gpfifo_entries);
    }

    // 2- Allocation locations
@@ -3285,9 +3297,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
    if (!is_string_valid_location(pushbuffer_loc_value)) {
        pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
-                uvm_channel_pushbuffer_loc,
-                pushbuffer_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
+                       uvm_channel_pushbuffer_loc,
+                       pushbuffer_loc_value);
    }

    // Override the default value if requested by the user
@@ -3297,8 +3309,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        // so force the location to sys for now.
        // TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
        if (NVCPU_IS_AARCH64) {
-            pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
-                    pushbuffer_loc_value);
+            UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
+                           pushbuffer_loc_value);
            manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
        }
        else {
@@ -3310,8 +3322,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    // Only support the knobs for GPFIFO/GPPut on Volta+
    if (!gpu->parent->gpfifo_in_vidmem_supported) {
        if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
-            pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
-                    buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
+            UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
+                           "instead\n",
+                           buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
        }

        manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@@ -3323,17 +3336,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    gpfifo_loc_value = uvm_channel_gpfifo_loc;
    if (!is_string_valid_location(gpfifo_loc_value)) {
        gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
-                uvm_channel_gpfifo_loc,
-                gpfifo_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
+                       uvm_channel_gpfifo_loc,
+                       gpfifo_loc_value);
    }

    gpput_loc_value = uvm_channel_gpput_loc;
    if (!is_string_valid_location(gpput_loc_value)) {
        gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
-                uvm_channel_gpput_loc,
-                gpput_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
+                       uvm_channel_gpput_loc,
+                       gpput_loc_value);
    }

    // On coherent platforms where the GPU does not cache sysmem but the CPU
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -57,6 +57,7 @@ enum {
 // NULL.
 void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);

+// Long prefix - typically for debugging and tests.
 #define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
    func(prefix "%s:%u %s[pid:%d]" fmt,               \
         kbasename(__FILE__),                         \
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
         current->pid,                                \
         ##__VA_ARGS__)

+// Short prefix - typically for information.
+#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
+    func(prefix fmt, ##__VA_ARGS__)
+
+// No prefix - used by kernel panic messages.
 #define UVM_PRINT_FUNC(func, fmt, ...)  \
    UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)

-// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
+// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
 bool uvm_debug_prints_enabled(void);

 // A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
        }                                                               \
    } while (0)

-#define UVM_ASSERT_PRINT(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ASSERT_PRINT_RL(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

 #define UVM_ERR_PRINT(fmt, ...) \
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
 #define UVM_DBG_PRINT(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_DBG_PRINT_RL(fmt, ...)                              \
+#define UVM_DBG_PRINT_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

+// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
+// used for relaying driver-level information, rather than detailed debugging
+// information; therefore, it does not add the "pretty long prefix".
 #define UVM_INFO_PRINT(fmt, ...) \
-    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+    UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...)                        \
+#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
    UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)

 #define UVM_PANIC()             UVM_PRINT_FUNC(panic, "\n")
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
 // Unlike on_uvm_test_fail it provides 'panic' coverity semantics
 void on_uvm_assert(void);

-#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                   \
-    do {                                                                                        \
-        if (unlikely(!(expr))) {                                                                \
-            UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
-            dump_stack();                                                                       \
-            on_uvm_assert();                                                                    \
-        }                                                                                       \
+#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                       \
+    do {                                                                                            \
+        if (unlikely(!(expr))) {                                                                    \
+            UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
+            dump_stack();                                                                           \
+            on_uvm_assert();                                                                        \
+        }                                                                                           \
    } while (0)

 // Prevent function calls in expr and the print argument list from being
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
        UVM_NO_PRINT(fmt, ##__VA_ARGS__);       \
    } while (0)

-// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
+// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
+// builds.
 #if UVM_IS_DEBUG() || defined __COVERITY__
    #define UVM_ASSERT_MSG(expr, fmt, ...)  _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
    #define UVM_ASSERT(expr)                _UVM_ASSERT_MSG(expr, #expr, "\n")
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
 // Given these are enabled for release builds, we need to be more cautious than
 // in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
 // param is enabled.
-#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                   \
-    do {                                                                                                \
-        if (uvm_release_asserts && unlikely(!(expr))) {                                                 \
-            UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
-            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
-                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                       \
-            if (uvm_release_asserts_dump_stack)                                                         \
-                dump_stack();                                                                           \
-            on_uvm_assert();                                                                            \
-        }                                                                                               \
+#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                       \
+    do {                                                                                                    \
+        if (uvm_release_asserts && unlikely(!(expr))) {                                                     \
+            UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
+            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests)     \
+                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                           \
+            if (uvm_release_asserts_dump_stack)                                                             \
+                dump_stack();                                                                               \
+            on_uvm_assert();                                                                                \
+        }                                                                                                   \
    } while (0)

 #define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // There is no dedicated lock for the CSL context associated with replayable
    // faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // See comment in uvm_conf_computing_fault_decrypt
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -93,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;

 typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;

-typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
-typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
+typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
+typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
 typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
+typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
 typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
-typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;

 typedef struct uvm_reverse_map_struct uvm_reverse_map_t;

--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
        goto error;
    }

+    status = uvm_access_counters_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
    // This sets up the ISR (interrupt service routine), by hooking into RM's
    // top-half ISR callback. As soon as this call completes, GPU interrupts
    // will start arriving, so it's important to be prepared to receive
@@ -224,8 +230,8 @@ void uvm_global_exit(void)
    nv_kthread_q_stop(&g_uvm_global.deferred_release_q);

    uvm_unregister_callbacks();
+    uvm_access_counters_exit();
    uvm_service_block_context_exit();
-
    uvm_perf_heuristics_exit();
    uvm_perf_events_exit();
    uvm_migrate_exit();
@@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
    //   * Flush relevant kthread queues (bottom half, etc.)

    // Some locks acquired by this function, such as pm.lock, are released
-    // by uvm_resume().  This is contrary to the lock tracking code's
+    // by uvm_resume(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
        gpu = uvm_gpu_get(gpu_id);

        // Since fault buffer state may be lost across sleep cycles, UVM must
-        // ensure any outstanding replayable faults are dismissed.  The RM
+        // ensure any outstanding replayable faults are dismissed. The RM
        // guarantees that all user channels have been preempted before
        // uvm_suspend() is called, which implies that no user channels can be
        // stalled on faults when this point is reached.
@@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
    }

    // Acquire each VA space's lock in write mode to lock out VMA open and
-    // release callbacks.  These entry points do not have feasible early exit
+    // release callbacks. These entry points do not have feasible early exit
    // options, and so aren't suitable for synchronization with pm.lock.
    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);

@@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
    g_uvm_global.pm.is_suspended = false;

    // Some locks released by this function, such as pm.lock, were acquired
-    // by uvm_suspend().  This is contrary to the lock tracking code's
+    // by uvm_suspend(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
    uvm_thread_context_lock_enable_tracking();

    // Force completion of any release callbacks successfully queued for
-    // deferred completion while suspended.  The deferred release
+    // deferred completion while suspended. The deferred release
    // queue is not guaranteed to remain empty following this flush since
    // some threads that failed to acquire pm.lock in uvm_release() may
    // not have scheduled their handlers yet.
@@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
    }
    else {
        UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
-                nvstatusToString(error), nvstatusToString(previous_error));
+                nvstatusToString(error),
+                nvstatusToString(previous_error));
    }

    nvUvmInterfaceReportFatalError(error);
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
    NvU64 num_pages_in;
    NvU64 num_pages_out;
    NvU64 mapped_cpu_pages_size;
-    NvU32 get, put;
+    NvU32 get;
+    NvU32 put;
+    NvU32 i;
    unsigned int cpu;

    UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
@@ -608,19 +610,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                                 gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
        }
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries       %u\n",
-                             gpu->parent->fault_buffer_info.replayable.max_faults);
+                             gpu->parent->fault_buffer.replayable.max_faults);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get           %u\n",
-                             gpu->parent->fault_buffer_info.replayable.cached_get);
+                             gpu->parent->fault_buffer.replayable.cached_get);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put           %u\n",
-                             gpu->parent->fault_buffer_info.replayable.cached_put);
+                             gpu->parent->fault_buffer.replayable.cached_put);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get                  %u\n",
                             gpu->parent->fault_buffer_hal->read_get(gpu->parent));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put                  %u\n",
                             gpu->parent->fault_buffer_hal->read_put(gpu->parent));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size     %u\n",
-                             gpu->parent->fault_buffer_info.max_batch_size);
+                             gpu->parent->fault_buffer.max_batch_size);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy        %s\n",
-                             uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
+                             uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults           %llu\n",
                             gpu->parent->stats.num_replayable_faults);
    }
@@ -634,32 +636,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                                 gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
        }
        UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries   %u\n",
-                             gpu->parent->fault_buffer_info.non_replayable.max_faults);
+                             gpu->parent->fault_buffer.non_replayable.max_faults);
        UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults       %llu\n",
                             gpu->parent->stats.num_non_replayable_faults);
    }

-    if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh                     %llu\n",
-                             gpu->parent->isr.access_counters.stats.bottom_half_count);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
-        for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
-            UVM_SEQ_OR_DBG_PRINT(s, "    cpu%02u                              %llu\n",
-                                 cpu,
-                                 gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
+    for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
+        if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
+            UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index     %u\n", i);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_bh                   %llu\n",
+                                 gpu->parent->isr.access_counters[i].stats.bottom_half_count);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_bh/cpu\n");
+            for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
+                UVM_SEQ_OR_DBG_PRINT(s, "    cpu%02u                              %llu\n",
+                                     cpu,
+                                     gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
+            }
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_buffer_entries       %u\n",
+                                 gpu->parent->access_counter_buffer[i].max_notifications);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_get           %u\n",
+                                 gpu->parent->access_counter_buffer[i].cached_get);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_put           %u\n",
+                                 gpu->parent->access_counter_buffer[i].cached_put);
+
+            get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
+            put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
+
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_get                  %u\n", get);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_put                  %u\n", put);
        }
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries         %u\n",
-                             gpu->parent->access_counter_buffer_info.max_notifications);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get             %u\n",
-                             gpu->parent->access_counter_buffer_info.cached_get);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put             %u\n",
-                             gpu->parent->access_counter_buffer_info.cached_put);
-
-        get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
-        put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
-
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get                    %u\n", get);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put                    %u\n", put);
    }

    num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
@@ -694,18 +699,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)

    UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults      %llu\n", parent_gpu->stats.num_replayable_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "duplicates             %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  prefetch             %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  read                 %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_read_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  write                %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_write_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  atomic               %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
-    num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
+                         parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
+    num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
+    num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -713,25 +718,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
                         (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
    UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  start                %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_replays);
+                         parent_gpu->fault_buffer.replayable.stats.num_replays);
    UVM_SEQ_OR_DBG_PRINT(s, "  start_ack_all        %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
+                         parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
    UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults  %llu\n", parent_gpu->stats.num_non_replayable_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  read                 %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  write                %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  atomic               %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  virtual              %llu\n",
                         parent_gpu->stats.num_non_replayable_faults -
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  physical             %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
-    num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
+    num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
+    num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -743,16 +748,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
 {
    NvU64 num_pages_in;
    NvU64 num_pages_out;
+    NvU32 i;

    UVM_ASSERT(uvm_procfs_is_debug_enabled());

-    num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
-    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
-    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
-                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
-    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_out        %llu (%llu MB)\n", num_pages_out,
-                         (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+    // procfs_files are created before gpu_init_isr, we need to check if the
+    // access_counter_buffer is allocated.
+    if (parent_gpu->access_counter_buffer) {
+        for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
+            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
+
+            num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
+            num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
+            UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
+            UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
+                                 (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+            UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_out        %llu (%llu MB)\n", num_pages_out,
+                                 (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+        }
+    }
 }

 // This function converts an index of 2D array of size [N x N] into an index
@@ -892,7 +906,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
    uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_info_print_common(gpu, s);

@@ -911,7 +925,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_fault_stats_print_common(parent_gpu, s);

@@ -930,7 +944,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_access_counters_print_common(parent_gpu, s);

@@ -1182,7 +1196,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
    uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
    uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
    uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
-    uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
+    uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
    uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
    uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
    uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1221,7 +1235,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)

    // Initialize enough of the gpu struct for remove_gpu to be called
    gpu->magic = UVM_GPU_MAGIC_VALUE;
-    uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
+    uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);

    sub_processor_index = uvm_id_sub_processor_index(gpu_id);
    parent_gpu->gpus[sub_processor_index] = gpu;
@@ -1545,12 +1559,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
        return status;
    }

-    status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
-    if (status != NV_OK) {
-        UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
-        return status;
-    }
-
    uvm_pmm_gpu_device_p2p_init(gpu);

    status = init_semaphore_pools(gpu);
@@ -1616,7 +1624,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    // trackers.
    if (sync_replay_tracker) {
        uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
+        status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
        uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);

        if (status != NV_OK)
@@ -1627,7 +1635,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    // VA block trackers, too.
    if (sync_clear_faulted_tracker) {
        uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
+        status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
        uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);

        if (status != NV_OK)
@@ -1635,13 +1643,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    }

    // Sync the access counter clear tracker too.
-    if (parent_gpu->access_counters_supported) {
-        uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
-        uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
+    if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
+        NvU32 notif_buf_index;
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];

-        if (status != NV_OK)
-            UVM_ASSERT(status == uvm_global_get_status());
+            if (access_counters->rm_info.accessCntrBufferHandle != 0) {
+                uvm_access_counters_isr_lock(access_counters);
+                status = uvm_tracker_wait(&access_counters->clear_tracker);
+                uvm_access_counters_isr_unlock(access_counters);
+
+                if (status != NV_OK)
+                    UVM_ASSERT(status == uvm_global_get_status());
+            }
+        }
    }
 }

@@ -1680,15 +1695,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
    UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));

-    // Access counters should have been disabled when the GPU is no longer
-    // registered in any VA space.
-    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+    deinit_parent_procfs_files(parent_gpu);

    // Return ownership to RM
    uvm_parent_gpu_deinit_isr(parent_gpu);

-    deinit_parent_procfs_files(parent_gpu);
-
    uvm_pmm_devmem_deinit(parent_gpu);
    uvm_ats_remove_gpu(parent_gpu);

@@ -1746,8 +1757,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)

    uvm_pmm_gpu_device_p2p_deinit(gpu);

-    uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
-
    uvm_pmm_gpu_deinit(&gpu->pmm);

    if (gpu->rm_address_space != 0)
@@ -1794,14 +1803,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
        switch (fault_entry->fault_access_type)
        {
            case UVM_FAULT_ACCESS_TYPE_READ:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
                break;
            case UVM_FAULT_ACCESS_TYPE_WRITE:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
                break;
            case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
            case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
                break;
            default:
                UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
@@ -1809,7 +1818,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
        }

        if (!fault_entry->is_virtual)
-            ++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
+            ++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;

        ++parent_gpu->stats.num_non_replayable_faults;

@@ -1821,23 +1830,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
    switch (fault_entry->fault_access_type)
    {
        case UVM_FAULT_ACCESS_TYPE_PREFETCH:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_READ:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_WRITE:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
        case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
            break;
        default:
            break;
    }
    if (is_duplicate || fault_entry->filtered)
-        ++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
+        ++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;

    ++parent_gpu->stats.num_replayable_faults;
 }
@@ -1901,21 +1910,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_

    if (gpu_dst) {
        atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
-        if (is_replayable_fault)
-            atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
-        else if (is_non_replayable_fault)
-            atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
-        else if (is_access_counter)
-            atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
+        if (is_replayable_fault) {
+            atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
+        }
+        else if (is_non_replayable_fault) {
+            atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
+        }
+        else if (is_access_counter) {
+            NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
+            atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
+        }
    }
    if (gpu_src) {
        atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
-        if (is_replayable_fault)
-            atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
-        else if (is_non_replayable_fault)
-            atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
-        else if (is_access_counter)
-            atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
+        if (is_replayable_fault) {
+            atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
+        }
+        else if (is_non_replayable_fault) {
+            atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
+        }
+        else if (is_access_counter) {
+            NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
+            atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
+        }
    }
 }

@@ -1929,8 +1946,9 @@ static void uvm_param_conf(void)
    }
    else {
        if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
-            pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
-                    uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
+            UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
+                           uvm_peer_copy,
+                           UVM_PARAM_PEER_COPY_PHYSICAL);
        }

        g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
@@ -2397,6 +2415,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
 {
    NV_STATUS status;

+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
    UVM_ASSERT(peer_caps->ref_count == 0);

    status = parent_peers_retain(gpu0->parent, gpu1->parent);
@@ -2419,25 +2438,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-    // In the case of NVLINK peers, this initialization will happen during
-    // add_gpu. As soon as the peer info table is assigned below, the access
-    // counter bottom half could start operating on the GPU being newly
-    // added and inspecting the peer caps, so all of the appropriate
-    // initialization must happen before this point.
-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
-
+    uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
    uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
-
-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);

+    uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
    uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
-
-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);

    return NV_OK;

@@ -2465,18 +2472,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)

 static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
 {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
    uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
    uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);

-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
    uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);

-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
    uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);

    // Flush the access counter buffer to avoid getting stale notifications for
    // accesses to GPUs to which peer access is being disabled. This is also
@@ -2690,7 +2697,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
    uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);

    // If the parent is being freed, stop scheduling new bottom halves and
-    // update relevant software state.  Else flush any pending bottom halves
+    // update relevant software state. Else flush any pending bottom halves
    // before continuing.
    if (free_parent)
        uvm_parent_gpu_disable_isr(parent_gpu);
@@ -2713,6 +2720,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
                         const UvmGpuInfo *gpu_info,
                         const UvmGpuPlatformInfo *gpu_platform_info,
                         uvm_parent_gpu_t *parent_gpu,
+                         const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                         uvm_gpu_t **gpu_out)
 {
    NV_STATUS status;
@@ -2725,6 +2733,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
        status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
        if (status != NV_OK)
            return status;
+
+        if (uvm_enable_builtin_tests)
+            parent_gpu->test = *parent_gpu_error;
    }

    gpu = alloc_gpu(parent_gpu, gpu_id);
@@ -2794,7 +2805,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
        // Clear the interrupt bit and force the re-evaluation of the interrupt
        // condition to ensure that we don't miss any pending interrupt
        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
-                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+                                                              parent_gpu->fault_buffer.replayable.cached_get);
    }

    // Access counters are enabled on demand
@@ -2837,6 +2848,7 @@ error:
 // the partition.
 static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
                                           const uvm_rm_user_object_t *user_rm_device,
+                                           const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                           uvm_gpu_t **gpu_out)
 {
    NV_STATUS status = NV_OK;
@@ -2888,7 +2900,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
        if (status != NV_OK)
            goto error_unregister;

-        status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
+        status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
        if (status != NV_OK)
            goto error_unregister;
    }
@@ -2913,11 +2925,12 @@ error_free_gpu_info:

 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out)
 {
    NV_STATUS status;
    uvm_mutex_lock(&g_uvm_global.global_lock);
-    status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
+    status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
    uvm_mutex_unlock(&g_uvm_global.global_lock);
    return status;
 }
@@ -3072,60 +3085,63 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
            return (address.address >= gpu->parent->peer_va_base &&
                    address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
        }
-    } else {
+    }
+    else {
        uvm_parent_gpu_t *parent_gpu;
        phys_addr_t phys_addr;

        if (uvm_aperture_is_peer(address.aperture)) {
-            bool is_peer = true;
            uvm_parent_processor_mask_t parent_gpus;
            uvm_parent_gpu_t *parent_peer_gpu;

+            // Local EGM accesses don't go over NVLINK
            if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
                return false;

-            // EGM uses peer IDs but they are different from VIDMEM peer IDs.
-            // Check if the address aperture is an EGM aperture.
+            uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
            uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
-            uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
            for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
-                uvm_aperture_t egm_peer_aperture;
-
                if (!parent_peer_gpu->egm.enabled)
                    continue;

-                egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
-
-                if (address.aperture == egm_peer_aperture) {
-                    is_peer = false;
-                    break;
-                }
+                // EGM uses peer IDs but they are different from VIDMEM peer
+                // IDs.
+                // Check if the address aperture is an EGM aperture.
+                // We should not use remote EGM addresses internally until
+                // NVLINK STO handling is updated to handle EGM.
+                // TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
+                //                    when accessing EGM memory
+                // TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
+                //                    systems
+                UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
            }

-            uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
+            return true;
+        } else if (address.aperture == UVM_APERTURE_SYS) {
+            bool is_peer = false;
+
+            // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
+            // either inline, or via ATS.
+            phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
+
+            // Exposed coherent vidmem can be accessed via sys aperture
+            uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
+            for_each_parent_gpu(parent_gpu) {
+                if (parent_gpu == gpu->parent)
+                    continue;
+
+                if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
+                    phys_addr <= parent_gpu->system_bus.memory_window_end) {
+                    is_peer = true;
+                }
+            }
+            uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
            return is_peer;
        }

-        if (address.aperture != UVM_APERTURE_SYS)
-            return false;
-
-        // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
-        // either inline, or via ATS.
-        phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
-
-        // Exposed coherent vidmem can be accessed via sys aperture
-        uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
-        for_each_parent_gpu(parent_gpu) {
-            if (parent_gpu == gpu->parent)
-                continue;
-
-            if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
-                phys_addr <= parent_gpu->system_bus.memory_window_end) {
-                uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
-                return true;
-            }
-        }
-        uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+        UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
    }

    return false;
@@ -3141,49 +3157,6 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
    return UVM_APERTURE_DEFAULT;
 }

-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
-{
-    uvm_processor_id_t id = UVM_ID_INVALID;
-
-    // TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
-    // is also reported for accesses to remote GPUs connected to a different CPU
-    // NUMA domain. We will need to determine the actual processor id using the
-    // reported physical address.
-    if (addr.aperture == UVM_APERTURE_SYS)
-        return UVM_ID_CPU;
-    else if (addr.aperture == UVM_APERTURE_VID)
-        return gpu->id;
-
-    uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
-
-    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
-        uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
-
-        UVM_ASSERT(other_gpu);
-        UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
-
-        if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
-            // NVSWITCH connected systems use an extended physical address to
-            // map to peers.  Find the physical memory 'slot' containing the
-            // given physical address to find the peer gpu that owns the
-            // physical address
-            NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
-                                      other_gpu->mem_info.max_allocatable_address;
-
-            if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
-                fabric_window_end >= addr.address)
-                break;
-        }
-        else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
-            break;
-        }
-    }
-
-    uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
-
-    return id;
-}
-
 static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
 {
    NvU64 key;
@@ -3570,20 +3543,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare

    *out_va_space = NULL;
    *out_gpu = NULL;
-    UVM_ASSERT(entry->address.is_virtual);

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);

-    user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
+    user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
    if (!user_channel) {
        status = NV_ERR_INVALID_CHANNEL;
        goto exit_unlock;
    }

    if (!user_channel->in_subctx) {
-        UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
+        UVM_ASSERT_MSG(entry->ve_id == 0,
                       "Access counter packet contains SubCTX %u for channel not in subctx\n",
-                       entry->virtual_info.ve_id);
+                       entry->ve_id);

        gpu_va_space = user_channel->gpu_va_space;
        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
@@ -3591,7 +3563,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
        *out_gpu = gpu_va_space->gpu;
    }
    else {
-        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
+        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
        if (gpu_va_space) {
            *out_va_space = gpu_va_space->va_space;
            *out_gpu = gpu_va_space->gpu;
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct

    // Prefetch temporary state.
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
+
+    // Access counters notification buffer index.
+    NvU32 access_counters_buffer_index;
 };

 typedef struct
@@ -197,8 +200,8 @@ typedef struct
    {
        struct
        {
-            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
-            // of a SAM VMA. Used for batching ATS faults in a vma.
+            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
+            // region of a SAM VMA. Used for batching ATS faults in a vma.
            uvm_page_mask_t prefetch_only_fault_mask;

            // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@@ -350,7 +353,7 @@ typedef struct
    // entries from the GPU buffer
    NvU32 max_batch_size;

-    struct uvm_replayable_fault_buffer_info_struct
+    struct uvm_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -414,7 +417,7 @@ typedef struct
        uvm_ats_fault_invalidate_t ats_invalidate;
    } replayable;

-    struct uvm_non_replayable_fault_buffer_info_struct
+    struct uvm_non_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -468,7 +471,7 @@ typedef struct

    // Timestamp when prefetch faults where disabled last time
    NvU64 disable_prefetch_faults_timestamp;
-} uvm_fault_buffer_info_t;
+} uvm_fault_buffer_t;

 struct uvm_access_counter_service_batch_context_struct
 {
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct

    NvU32 num_cached_notifications;

-    struct
-    {
-        uvm_access_counter_buffer_entry_t   **notifications;
+    uvm_access_counter_buffer_entry_t **notifications;

-        NvU32                             num_notifications;
+    NvU32 num_notifications;

-        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same instance_ptr
-        bool is_single_instance_ptr;
-    } virt;
-
-    struct
-    {
-        uvm_access_counter_buffer_entry_t    **notifications;
-        uvm_reverse_map_t                      *translations;
-
-        NvU32                              num_notifications;
-
-        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same aperture
-        bool                              is_single_aperture;
-    } phys;
+    // Boolean used to avoid sorting the fault batch by instance_ptr if we
+    // determine at fetch time that all the access counter notifications in
+    // the batch report the same instance_ptr
+    bool is_single_instance_ptr;

    // Helper page mask to compute the accessed pages within a VA block
    uvm_page_mask_t accessed_pages;
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
    NvU32 batch_id;
 };

-typedef struct
+struct uvm_access_counter_buffer_struct
 {
-    // Values used to configure access counters in RM
-    struct
-    {
-        UVM_ACCESS_COUNTER_GRANULARITY  granularity;
-        UVM_ACCESS_COUNTER_USE_LIMIT    use_limit;
-    } rm;
+    uvm_parent_gpu_t *parent_gpu;

-    // The following values are precomputed by the access counter notification
-    // handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
-    // uvm_gpu_access_counters.c for more details.
-    NvU64 translation_size;
-
-    NvU64 translations_per_counter;
-
-    NvU64 sub_granularity_region_size;
-
-    NvU64 sub_granularity_regions_per_translation;
-} uvm_gpu_access_counter_type_config_t;
-
-typedef struct
-{
    UvmGpuAccessCntrInfo rm_info;

+    // Access counters may have multiple notification buffers.
+    NvU32 index;
+
    NvU32 max_notifications;

    NvU32 max_batch_size;
@@ -560,10 +531,22 @@ typedef struct
    // may override it to try different configuration values.
    struct
    {
-        uvm_gpu_access_counter_type_config_t mimc;
-        uvm_gpu_access_counter_type_config_t momc;
+        // Values used to configure access counters in RM
+        struct
+        {
+            UVM_ACCESS_COUNTER_GRANULARITY granularity;
+        } rm;

-        NvU32                                threshold;
+        // The following values are precomputed by the access counter
+        // notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
+        // in uvm_gpu_access_counters.c for more details.
+        NvU64 translation_size;
+
+        NvU64 sub_granularity_region_size;
+
+        NvU64 sub_granularity_regions_per_translation;
+
+        NvU32 threshold;
    } current_config;

    // Access counter statistics
@@ -575,7 +558,7 @@ typedef struct
    } stats;

    // Ignoring access counters means that notifications are left in the HW
-    // buffer without being serviced.  Requests to ignore access counters
+    // buffer without being serviced. Requests to ignore access counters
    // are counted since the suspend path inhibits access counter interrupts,
    // and the resume path needs to know whether to reenable them.
    NvU32 notifications_ignored_count;
@@ -583,13 +566,25 @@ typedef struct
    // Context structure used to service a GPU access counter batch
    uvm_access_counter_service_batch_context_t batch_service_context;

-    // VA space that reconfigured the access counters configuration, if any.
-    // Used in builtin tests only, to avoid reconfigurations from different
-    // processes
-    //
-    // Locking: both readers and writers must hold the access counters ISR lock
-    uvm_va_space_t *reconfiguration_owner;
-} uvm_access_counter_buffer_info_t;
+    struct
+    {
+        // VA space that reconfigured the access counters configuration, if any.
+        // Used in builtin tests only, to avoid reconfigurations from different
+        // processes.
+        //
+        // Locking: both readers and writers must hold the access counters ISR
+        // lock.
+        uvm_va_space_t *reconfiguration_owner;
+
+        // The service access counters loop breaks after processing the first
+        // batch. It will be retriggered if there are pending notifications, but
+        // it releases the ISR service lock to check certain races that would be
+        // difficult to hit otherwise.
+        bool one_iteration_per_batch;
+        NvU32 sleep_per_iteration_us;
+    } test;
+
+};

 typedef struct
 {
@@ -745,15 +740,11 @@ struct uvm_gpu_struct

    struct
    {
-        // Mask of peer_gpus set
+        // Mask of peer_gpus set.
        uvm_processor_mask_t peer_gpu_mask;

-        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
-        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
-
-        // Leaf spinlock used to synchronize access to the peer_gpus table so
-        // that it can be safely accessed from the access counters bottom half
-        uvm_spinlock_t peer_gpus_lock;
+        // Leaf spinlock used to synchronize access to peer_gpu_mask.
+        uvm_spinlock_t peer_gpu_lock;
    } peer_info;

    // Maximum number of subcontexts supported
@@ -828,14 +819,6 @@ struct uvm_gpu_struct
        uvm_bit_locks_t bitlocks;
    } sysmem_mappings;

-    // Reverse lookup table used to query the user mapping associated with a
-    // sysmem (DMA) physical address.
-    //
-    // The system memory mapping information referred to by this field is
-    // different from that of sysmem_mappings, because it relates to user
-    // mappings (instead of kernel), and it is used in most configurations.
-    uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
-
    struct
    {
        uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@@ -957,6 +940,16 @@ struct uvm_gpu_struct
    uvm_mutex_t device_p2p_lock;
 };

+typedef struct
+{
+    bool access_counters_alloc_buffer;
+    bool access_counters_alloc_block_context;
+    bool isr_access_counters_alloc;
+    bool isr_access_counters_alloc_stats_cpu;
+    bool access_counters_batch_context_notifications;
+    bool access_counters_batch_context_notification_cache;
+} uvm_test_parent_gpu_inject_error_t;
+
 // In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
 // parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
 // (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -965,8 +958,8 @@ struct uvm_gpu_struct
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
-    // (internal to the UVM driver).  This includes any GPUs we know about, not
-    // just GPUs that are registered with a VA space.  Most GPUs end up being
+    // (internal to the UVM driver). This includes any GPUs we know about, not
+    // just GPUs that are registered with a VA space. Most GPUs end up being
    // registered, but there are brief periods when they are not registered,
    // such as during interrupt handling, and in add_gpu() or remove_gpu().
    nv_kref_t gpu_kref;
@@ -976,7 +969,7 @@ struct uvm_parent_gpu_struct

    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

-    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
+    // Bitmap of valid child entries in the gpus[] table. Used to retrieve a
    // usable child GPU in bottom-halves.
    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

@@ -1079,11 +1072,6 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

-    // If this is true, physical address based access counter notifications are
-    // potentially generated. If false, only virtual address based notifications
-    // are generated (assuming access_counters_supported is true too).
-    bool access_counters_can_use_physical_addresses;
-
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@@ -1205,17 +1193,17 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is
-    // set to true.
-    uvm_fault_buffer_info_t fault_buffer_info;
+    // This is only valid if supports_replayable_faults is set to true.
+    uvm_fault_buffer_t fault_buffer;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if
-    // supports_access_counters is set to true.
-    uvm_access_counter_buffer_info_t access_counter_buffer_info;
+    // This is only valid if supports_access_counters is set to true. This array
+    // has rm_info.accessCntrBufferCount entries.
+    uvm_access_counter_buffer_t *access_counter_buffer;
+    uvm_mutex_t access_counters_enablement_lock;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;
@@ -1348,6 +1336,8 @@ struct uvm_parent_gpu_struct
        // GPUs.
        NvU64 base_address;
    } egm;
+
+    uvm_test_parent_gpu_inject_error_t test;
 };

 static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@@ -1395,10 +1385,10 @@ typedef struct
    //   detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
    //   called.
    //
-    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
-    //   GPU must be read from the original GPU's peer_gpus table. The fields
-    //   will not change while the lock is held, but they may no longer be valid
-    //   because the other GPU might be in teardown.
+    // - The peer_gpu_lock is held on one of the GPUs. In this case, the other
+    //   GPU must be referred from the original GPU's peer_gpu_mask reference.
+    //   The fields will not change while the lock is held, but they may no
+    //   longer be valid because the other GPU might be in teardown.

    // This field is used to determine when this struct has been initialized
    // (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1510,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

 // Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
-// that the caller is holding the global_lock.  This is a narrower-purpose
+// that the caller is holding the global_lock. This is a narrower-purpose
 // function, and is only intended for use by the top-half ISR, or other very
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1521,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
 // LOCKING: Takes and releases the global lock for the caller.
 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out);

 // Retain a gpu which is known to already be retained. Does NOT require the
@@ -1578,10 +1569,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
 // The two GPUs must have different parents.
 NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);

-// Get the processor id accessible by the given GPU for the given physical
-// address.
-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
-
 // Get the EGM aperture for local_gpu to use to map memory resident on the CPU
 // NUMA node that remote_gpu is attached to.
 // Note that local_gpu can be equal to remote_gpu when memory is resident in
@@ -1655,7 +1642,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_

 // Check whether the provided address points to peer memory:
 // * Physical address using one of the PEER apertures
-// * Physical address using SYS aperture that belongs to an exposed coherent memory
+// * Physical address using SYS aperture that belongs to an exposed coherent
+//   memory
 // * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
 bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);

@@ -1684,8 +1672,8 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
 // Check for NVLINK errors without calling into RM
 //
 // Calling into RM is problematic in many places, this check is always safe to
-// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
-// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
+// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
 NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);

 // Map size bytes of contiguous sysmem on the GPU for physical access
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,11 +27,11 @@
 #include "uvm_forward_decl.h"
 #include "uvm_test_ioctl.h"

-NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
-bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);

-void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);

 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
 //
 // When uningoring, the interrupt conditions will be re-evaluated to trigger
 // processing of buffered notifications, if any exist.
+//
+// All parent_gpu's notifications buffers are affected.
 void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);

 // Return whether the VA space has access counter migrations enabled. The
 // caller must ensure that the VA space cannot go away.
 bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);

-// Global perf initialization/cleanup functions
+// Global access counters initialization/cleanup functions.
+NV_STATUS uvm_access_counters_init(void);
+void uvm_access_counters_exit(void);
+
+// Global perf initialization/cleanup functions.
 NV_STATUS uvm_perf_access_counters_init(void);
 void uvm_perf_access_counters_exit(void);

-// VA space Initialization/cleanup functions. See comments in
+// VA space initialization/cleanup functions. See comments in
 // uvm_perf_heuristics.h
 NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
 void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
 // counters are currently enabled. The hardware notifications and interrupts on
 // the GPU are enabled the first time any VA space invokes
 // uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
-// uvm_parent_gpu_access_counters_disable().
+// uvm_gpu_access_counters_disable().
 //
 // Locking: the VA space lock must not be held by the caller since these
 // functions may take the access counters ISR lock.
 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
-void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
+void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);

 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
                                                      struct file *filp);
 NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);

 #endif // __UVM_GPU_ACCESS_COUNTERS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
    return 1;
 }

-static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
+static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    // On Volta, accessCntrBufferCount is > 0, but we don't support access
+    // counters in UVM (access_counters_supported is cleared during HAL
+    // initialization.) This check prevents the top-half from accessing
+    // unallocated memory.
+    if (!parent_gpu->access_counters_supported)
+        return 0;

    if (parent_gpu->isr.is_suspended)
        return 0;

-    if (!parent_gpu->isr.access_counters.handling_ref_count)
+    if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
        return 0;

-    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
+    if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
        return 0;

-    if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
-        up(&parent_gpu->isr.access_counters.service_lock.sem);
+    if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
+        up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
        return 0;
    }

    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled to avoid an interrupt storm
-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);

    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
-                                 &parent_gpu->isr.access_counters.bottom_half_q_item);
+                                 &parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);

    return 1;
 }

-// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
-// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
-// did, back to RM, via the return code:
+// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
+// UVM is given a chance to handle the interrupt, before most of the RM
+// processing. UVM communicates what it did, back to RM, via the return code:
 //
 //     NV_OK:
 //         UVM handled an interrupt.
 //
 //     NV_WARN_MORE_PROCESSING_REQUIRED:
-//         UVM did not schedule a bottom half, because it was unable to get the locks it
-//         needed, but there is still UVM work to be done. RM will return "not handled" to the
-//         Linux kernel, *unless* RM handled other faults in its top half. In that case, the
-//         fact that UVM did not handle its interrupt is lost. However, life and interrupt
-//         processing continues anyway: the GPU will soon raise another interrupt, because
-//         that's what it does when there are replayable page faults remaining (GET != PUT in
-//         the fault buffer).
+//         UVM did not schedule a bottom half, because it was unable to get the
+//         locks it needed, but there is still UVM work to be done. RM will
+//         return "not handled" to the Linux kernel, *unless* RM handled other
+//         faults in its top half. In that case, the fact that UVM did not
+//         handle its interrupt is lost. However, life and interrupt processing
+//         continues anyway: the GPU will soon raise another interrupt, because
+//         that's what it does when there are replayable page faults remaining
+//         (GET != PUT in the fault buffer).
 //
 //     NV_ERR_NO_INTR_PENDING:
-//         UVM did not find any work to do. Currently this is handled in RM in exactly the same
-//         way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
-//         available for the future. RM's interrupt handling tends to evolve as new chips and
-//         new interrupts get created.
+//         UVM did not find any work to do. Currently this is handled in RM in
+//         exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
+//         However, the extra precision is available for the future. RM's
+//         interrupt handling tends to evolve as new chips and new interrupts
+//         get created.

 static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
 {
    uvm_parent_gpu_t *parent_gpu;
    unsigned num_handlers_scheduled = 0;
    NV_STATUS status = NV_OK;
+    NvU32 i;

    if (!in_interrupt() && in_atomic()) {
        // Early-out if we're not in interrupt context, but memory allocations
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
    nv_kref_get(&parent_gpu->gpu_kref);
    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

-    // Now that we got a GPU object, lock it so that it can't be removed without us noticing.
+    // Now that we got a GPU object, lock it so that it can't be removed without
+    // us noticing.
    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

    ++parent_gpu->isr.interrupt_count;

    num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
    num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
-    num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
+    for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
+        num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);

    if (num_handlers_scheduled == 0) {
        if (parent_gpu->isr.is_suspended)
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
    return errno_to_nv_status(nv_kthread_q_init(queue, name));
 }

+static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_block_context_t *block_context;
+
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
+
+    status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
+                      nvstatusToString(status),
+                      uvm_parent_gpu_name(parent_gpu),
+                      notif_buf_index);
+        return status;
+    }
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
+        return NV_ERR_NO_MEMORY;
+
+    block_context = uvm_va_block_context_alloc(NULL);
+    if (!block_context)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
+        block_context;
+
+    nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
+                           access_counters_isr_bottom_half_entry,
+                           &parent_gpu->access_counter_buffer[notif_buf_index]);
+
+    // Access counters interrupts are initially disabled. They are
+    // dynamically enabled when the GPU is registered on a VA space.
+    parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
+        uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
+                          num_possible_cpus());
+    if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
+        return NV_ERR_NO_MEMORY;
+
+    return NV_OK;
+}
+
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        if (!block_context)
            return NV_ERR_NO_MEMORY;

-        parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
+        parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;

        parent_gpu->isr.replayable_faults.handling = true;

@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
            if (!block_context)
                return NV_ERR_NO_MEMORY;

-            parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
+            parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;

            parent_gpu->isr.non_replayable_faults.handling = true;

@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        }

        if (parent_gpu->access_counters_supported) {
-            status = uvm_parent_gpu_init_access_counters(parent_gpu);
-            if (status != NV_OK) {
-                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
-                              nvstatusToString(status),
-                              uvm_parent_gpu_name(parent_gpu));
-                return status;
+            NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
+            NvU32 notif_buf_index;
+
+            UVM_ASSERT(index_count > 0);
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
+                                                                  index_count);
+            if (!parent_gpu->access_counter_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
+            if (!parent_gpu->isr.access_counters)
+                return NV_ERR_NO_MEMORY;
+
+            for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
+                status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
+                if (status != NV_OK)
+                    return status;
            }
-
-            block_context = uvm_va_block_context_alloc(NULL);
-            if (!block_context)
-                return NV_ERR_NO_MEMORY;
-
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
-                block_context;
-
-            nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
-                                   access_counters_isr_bottom_half_entry,
-                                   parent_gpu);
-
-            // Access counters interrupts are initially disabled. They are
-            // dynamically enabled when the GPU is registered on a VA space.
-            parent_gpu->isr.access_counters.handling_ref_count = 0;
-            parent_gpu->isr.access_counters.stats.cpu_exec_count =
-                uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
-            if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
-                return NV_ERR_NO_MEMORY;
        }
    }

@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)

 void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+    NvU32 notif_buf_index;
+
+    if (parent_gpu->isr.access_counters) {
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
+                           "notif buf index: %u\n",
+                           notif_buf_index);
+        }
+    }

    // Now that the GPU is safely out of the global table, lock the GPU and mark
    // it as no longer handling interrupts so the top half knows not to schedule
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
    }

    if (parent_gpu->access_counters_supported) {
-        // It is safe to deinitialize access counters even if they have not been
-        // successfully initialized.
-        uvm_parent_gpu_deinit_access_counters(parent_gpu);
-        block_context =
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
-        uvm_va_block_context_free(block_context);
+        NvU32 notif_buf_index;
+
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            // It is safe to deinitialize access counters even if they have not
+            // been successfully initialized.
+            uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
+
+            if (parent_gpu->access_counter_buffer) {
+                uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
+                block_context = access_counter->batch_service_context.block_service_context.block_context;
+                uvm_va_block_context_free(block_context);
+            }
+
+            if (parent_gpu->isr.access_counters)
+                uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
+        }
+
+        uvm_kvfree(parent_gpu->isr.access_counters);
+        uvm_kvfree(parent_gpu->access_counter_buffer);
    }

    if (parent_gpu->non_replayable_faults_supported) {
-        block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
+        block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
        uvm_va_block_context_free(block_context);
+
+        uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
    }

-    block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
+    block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
    uvm_va_block_context_free(block_context);
+
    uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
 }

 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)

 static void access_counters_isr_bottom_half(void *args)
 {
-    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
    unsigned int cpu;

    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);

-    uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
+    uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);

    // Multiple bottom halves for counter notifications can be running
-    // concurrently, but only one can be running this function for a given GPU
-    // since we enter with the access_counters_isr_lock held.
+    // concurrently, but only one per-notification-buffer (i.e.,
+    // notif_buf_index) can be running this function for a given GPU since we
+    // enter with the per-notification-buffer access_counters_isr_lock held.
    cpu = get_cpu();
-    ++parent_gpu->isr.access_counters.stats.bottom_half_count;
-    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
-    ++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_access_counters(parent_gpu);
+    uvm_service_access_counters(access_counters);

-    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
+    uvm_access_counters_isr_unlock(access_counters);

    uvm_parent_gpu_kref_put(parent_gpu);
 }
@@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
        // clear_replayable_faults is a no-op for architectures that don't
        // support pulse-based interrupts.
        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
-                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+                                                              parent_gpu->fault_buffer.replayable.cached_get);
    }

    // This unlock call has to be out-of-order unlock due to interrupts_lock
@@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
    uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
 {
    // See comments in uvm_parent_gpu_replayable_faults_isr_lock
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(access_counters);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

-    uvm_down(&parent_gpu->isr.access_counters.service_lock);
+    uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+    uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
+
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

    // See comments in uvm_parent_gpu_replayable_faults_isr_unlock

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
+    uvm_access_counters_intr_enable(access_counters);

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
-        parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
-                                                                                  parent_gpu->access_counter_buffer_info.cached_get);
-    }
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
+        ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);

    // This unlock call has to be out-of-order unlock due to interrupts_lock
    // still being held. Otherwise, it would result in a lock order violation.
-    uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
+    uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }
@@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
        parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
 }

-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

    // The read of handling_ref_count could race with a write from
@@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
    // ISR lock. But those functions are invoked with the interrupt disabled
    // (disable_intr_ref_count > 0), so the check always returns false when the
    // race occurs
-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
    }

-    ++parent_gpu->isr.access_counters.disable_intr_ref_count;
+    ++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
 }

-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
-    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
-    UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
+    UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);

-    --parent_gpu->isr.access_counters.disable_intr_ref_count;
+    --parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
    }
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct

    struct
    {
-        // Number of the bottom-half invocations for this interrupt on a GPU over
-        // its lifetime
+        // Number of the bottom-half invocations for this interrupt on a GPU
+        // over its lifetime.
        NvU64 bottom_half_count;

        // A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
    // bottom-half per interrupt type.
    nv_kthread_q_t bottom_half_q;

-    // Protects the state of interrupts (enabled/disabled) and whether the GPU is
-    // currently handling them. Taken in both interrupt and process context.
+    // Protects the state of interrupts (enabled/disabled) and whether the GPU
+    // is currently handling them. Taken in both interrupt and process context.
    uvm_spinlock_irqsave_t interrupts_lock;

    uvm_intr_handler_t replayable_faults;
    uvm_intr_handler_t non_replayable_faults;
-    uvm_intr_handler_t access_counters;
+    uvm_intr_handler_t *access_counters;

    // Kernel thread used to kill channels on fatal non-replayable faults.
    // This is needed because we cannot call into RM from the bottom-half to
    // avoid deadlocks.
    nv_kthread_q_t kill_channel_q;

-    // Number of top-half ISRs called for this GPU over its lifetime
+    // Number of top-half ISRs called for this GPU over its lifetime.
    NvU64 interrupt_count;
 } uvm_isr_info_t;

@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
 // Initialize ISR handling state
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);

-// Flush any currently scheduled bottom halves.  This is called during GPU
+// Flush any currently scheduled bottom halves. This is called during GPU
 // removal.
 void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);

@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);

 // Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
-// half thread.  This will also disable replayable page fault interrupts (if
+// half thread. This will also disable replayable page fault interrupts (if
 // supported by the GPU) because the top half attempts to take this lock, and we
 // would cause an interrupt storm if we didn't disable them first.
 //
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);

 // Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
-// re-enable replayable page fault interrupts.  Unlike
-// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
+// re-enable replayable page fault interrupts. Unlike
+// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
 // non-top/bottom half threads, this can be called by any thread.
 void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Lock/unlock routines for non-replayable faults. These do not need to prevent
 // interrupt storms since the GPU fault buffers for non-replayable faults are
-// managed by RM.  Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
+// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
 // under the parent need to have been previously retained.
 void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // See uvm_parent_gpu_replayable_faults_isr_lock/unlock
-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);

 // Increments the reference count tracking whether access counter interrupts
 // should be disabled. The caller is guaranteed that access counter interrupts
 // are disabled upon return. Interrupts might already be disabled prior to
 // making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_parent_gpu_access_counters_intr_enable().
+// call to uvm_access_counters_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);

 // Decrements the reference count tracking whether access counter interrupts
 // should be disabled. Only once the count reaches 0 are the HW interrupts
 // actually enabled, so this call does not guarantee that the interrupts have
 // been re-enabled upon return.
 //
-// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
-// calling this function.
+// uvm_access_counters_intr_disable() must have been called prior to calling
+// this function.
 //
 // NOTE: For pulse-based interrupts, the caller is responsible for re-arming
 // the interrupt.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);

 // Return the first valid GPU given the parent GPU or NULL if no MIG instances
 // are registered. This should only be called from bottom halves or if the
 // g_uvm_global.global_lock is held so that the returned pointer remains valid.
-//
 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,18 +119,18 @@
 // calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
 NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

    non_replayable_faults->shadow_buffer_copy = NULL;
    non_replayable_faults->fault_cache        = NULL;

-    non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
+    non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
                                        parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    non_replayable_faults->shadow_buffer_copy =
-        uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
+        uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
    if (!non_replayable_faults->shadow_buffer_copy)
        return NV_ERR_NO_MEMORY;

@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_

 void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    if (non_replayable_faults->fault_cache) {
        UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)

    UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);

-    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                         &has_pending_faults);
    UVM_ASSERT(status == NV_OK);

@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    NV_STATUS status;
    NvU32 i;
    NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
    uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                  current_hw_entry,
                                                  cached_faults);

@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
    uvm_gpu_t *gpu = user_channel->gpu;
    NV_STATUS status;
    uvm_push_t push;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
    uvm_processor_id_t new_residency;
    bool read_duplicate;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    const uvm_va_policy_t *policy;

    UVM_ASSERT(!fault_entry->is_fatal);
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
    NV_STATUS status, tracker_status;
    uvm_va_block_retry_t va_block_retry;
    uvm_gpu_t *gpu = fault_entry->gpu;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;

    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
    service_context->num_retries = 0;
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
                                                                             service_context,
                                                                             hmm_migratable));

-    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
+    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
                                                  &va_block->tracker);

    uvm_mutex_unlock(&va_block->lock);
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
 {
    uvm_va_space_t *va_space = fault_entry->va_space;
    uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
                   (fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));

@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
    NV_STATUS status = lookup_status;
    NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
    struct mm_struct *mm;
    uvm_gpu_va_space_t *gpu_va_space;
    uvm_gpu_t *gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;

    status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@@ -757,7 +757,7 @@ exit_no_channel:
 static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
 {
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.non_replayable.block_service_context;
+        &parent_gpu->fault_buffer.non_replayable.block_service_context;
    NV_STATUS status;
    bool hmm_migratable = true;

@@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
        for (i = 0; i < cached_faults; ++i) {
-            status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
+            status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
            if (status != NV_OK)
                return;
        }
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
 // the power management resume path.
 static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    // Read the current get/put pointers, as this might not be the first time
    // we take control of the fault buffer since the GPU was initialized,
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);

    // (Re-)enable fault prefetching
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled)
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    else
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

-    UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
+    UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
               parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);

-    replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
+    replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
                                    parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    // Check provided module parameter value
-    parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
-                                                       (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
-    parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
-                                                       replayable_faults->max_faults);
+    parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
+                                                  (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
+    parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
+                                                  replayable_faults->max_faults);

-    if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
-        pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_batch_count,
-                UVM_PERF_FAULT_BATCH_COUNT_MIN,
-                replayable_faults->max_faults,
-                parent_gpu->fault_buffer_info.max_batch_size);
+    if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_batch_count,
+                       UVM_PERF_FAULT_BATCH_COUNT_MIN,
+                       replayable_faults->max_faults,
+                       parent_gpu->fault_buffer.max_batch_size);
    }

    batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
                                           UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;

    if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
-        pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_policy,
-                replayable_faults->replay_policy);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_policy,
+                       replayable_faults->replay_policy);
    }

    replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
    if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
-        pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_update_put_ratio,
-                replayable_faults->replay_update_put_ratio);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_update_put_ratio,
+                       replayable_faults->replay_update_put_ratio);
    }

    // Re-enable fault prefetching just in case it was disabled in a previous run
-    parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
+    parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;

    fault_buffer_reinit_replayable_faults(parent_gpu);

@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp

 static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    if (batch_context->fault_cache) {
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
        uvm_tracker_deinit(&replayable_faults->replay_tracker);
    }

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        // Re-enable prefetch faults in case we disabled them
-        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
            parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    }

@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
-                                                            &parent_gpu->fault_buffer_info.rm_info));
+                                                            &parent_gpu->fault_buffer.rm_info));
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
        // when it returns an error. Set the buffer handle to zero as it is
        // used by the deinitialization logic to determine if it was correctly
        // initialized.
-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
        goto fail;
    }

@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)

    fault_buffer_deinit_replayable_faults(parent_gpu);

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
        UVM_ASSERT(status == NV_OK);

        uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
-                                                               &parent_gpu->fault_buffer_info.rm_info));
+                                                               &parent_gpu->fault_buffer.rm_info));

-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
    }
 }

 bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);

-    // Fast path 1: we left some faults unserviced in the buffer in the last pass
+    // Fast path 1: we left some faults unserviced in the buffer in the last
+    // pass
    if (replayable_faults->cached_get != replayable_faults->cached_put)
        return true;

@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
+    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;

    UVM_ASSERT(tracker != NULL);

@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
    uvm_gpu_t *gpu = fault_entry->gpu;
    uvm_gpu_phys_address_t pdb;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    NvU64 offset;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_tracker_t *tracker = NULL;

    if (batch_context)
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,

 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
        return NV_OK;

    is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
-    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
+    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);

    UVM_ASSERT(status == NV_OK);

@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
    NvU32 get;
    NvU32 put;
    uvm_spin_loop_t spin;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    NV_STATUS status;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
    uvm_fault_buffer_entry_t *fault_cache;
    uvm_spin_loop_t spin;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
    const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;

@@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,

    // Parse until get != put and have enough space to cache.
    while ((get != put) &&
-           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
+           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
        bool is_same_instance_ptr = true;
        uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
        uvm_fault_utlb_info_t *current_tlb;
@@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
    uvm_page_index_t last_page_index;
    NvU32 page_fault_count = 0;
    uvm_range_group_range_iter_t iter;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
    uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
    uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
    NV_STATUS status;
    uvm_va_block_retry_t va_block_retry;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;

    fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    bool replay_per_va_block =
-                        (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
+                        (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);

    UVM_ASSERT(vma);

@@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

        page_index = (fault_address - sub_batch_base) / PAGE_SIZE;

-        // Do not check for coalesced access type. If there are multiple different
-        // accesses to an address, we can disregard the prefetch one.
+        // Do not check for coalesced access type. If there are multiple
+        // different accesses to an address, we can disregard the prefetch one.
        if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
            (uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
            uvm_page_mask_set(prefetch_only_fault_mask, page_index);
@@ -1956,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
    uvm_va_block_t *va_block;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    uvm_va_block_context_t *va_block_context =
-        gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
+        gpu->parent->fault_buffer.replayable.block_service_context.block_context;
    uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
    struct mm_struct *mm = va_block_context->mm;
    NvU64 fault_address = current_entry->fault_address;
@@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
        NvU64 outer = ~0ULL;

         UVM_ASSERT(replay_per_va_block ==
-                    (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
+                    (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));

        // Limit outer to the minimum of next va_range.start and first
        // fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
    uvm_gpu_t *gpu = batch_context->fatal_gpu;
    uvm_gpu_va_space_t *gpu_va_space = NULL;
    struct mm_struct *mm;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;

    UVM_ASSERT(va_space);
@@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
            ++i;
        }
        else {
-            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
            NvU32 block_faults;
            const bool hmm_migratable = true;

@@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
    NvU32 i;
    uvm_va_space_t *va_space = NULL;
    uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
-    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
+    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
    struct mm_struct *mm = NULL;
    const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
-                                     parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
+                                     parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.replayable.block_service_context;
+        &parent_gpu->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;
    bool hmm_migratable = true;

@@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
 //   5- Fetch all faults from buffer
 //   6- Check what uTLBs are in lockdown mode and can be cancelled
 //   7- Preprocess faults (order per va_space, fault address, access type)
-//   8- Service all non-fatal faults and mark all non-serviceable faults as fatal
-//      6.1- If fatal faults are not found, we are done
+//   8- Service all non-fatal faults and mark all non-serviceable faults as
+//      fatal.
+//      8.1- If fatal faults are not found, we are done
 //   9- Search for a uTLB which can be targeted for cancel, as described in
 //      try_to_cancel_utlbs. If found, cancel it.
 // END LOOP
@@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
 {
    NV_STATUS status;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    bool first = true;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);

    // 1) Disable prefetching to avoid new requests keep coming and flooding
    //    the buffer
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);

    while (1) {
@@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
    }

    // 10) Re-enable prefetching
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);

    if (status == NV_OK)
@@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
    // comment in mark_fault_invalid_prefetch(..).
    // Some tests rely on this logic (and ratio) to correctly disable prefetch
    // fault reporting. If the logic changes, the tests will have to be changed.
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
        uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
-        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
+        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
         (uvm_enable_builtin_tests &&
          parent_gpu->rm_info.isSimulated &&
          batch_context->num_invalid_prefetch_faults > 5))) {
        uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
    }
-    else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
-        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
+    else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
+        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;

        // Reenable prefetch faults after some time
        if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    NvU32 num_batches = 0;
    NvU32 num_throttled = 0;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);
@@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
+        parent_gpu->fault_buffer.prefetch_faults_enabled = true;
    }
 }

@@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
-        parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
+        parent_gpu->fault_buffer.prefetch_faults_enabled = false;
+        parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
    }
 }

--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
            .clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
            .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
            .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
-            .access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
            .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
            .get_time = uvm_hal_maxwell_get_time,
        }
@@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
            .replay_faults = uvm_hal_volta_replay_faults,
            .cancel_faults_va = uvm_hal_volta_cancel_faults_va,
            .clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
-            .access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
-            .access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
-            .access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
            .semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
        }
    },
@@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
            .tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
            .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
+            .access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
+            .access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
        }
    },
    {
@@ -537,22 +535,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
-        .u.access_counter_buffer_ops = {
-            .enable_access_counter_notifications  = uvm_hal_volta_enable_access_counter_notifications,
-            .disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
-            .clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
-            .parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
-            .entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
-            .entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
-            .entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
-        }
+        .u.access_counter_buffer_ops = {}
    },
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .u.access_counter_buffer_ops = {
+            .enable_access_counter_notifications  = uvm_hal_turing_enable_access_counter_notifications,
            .disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
            .clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
+            .parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
+            .entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
+            .entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
+            .entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
        }
    },
    {
@@ -843,10 +838,8 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
    // Computing.
    //
    // TODO: Bug 200692962: Add support for access counters in vGPU
-    if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
+    if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
        parent_gpu->access_counters_supported = false;
-        parent_gpu->access_counters_can_use_physical_addresses = false;
-    }
 }

 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -1042,36 +1035,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
    UVM_DBG_PRINT("    timestamp:                    %llu\n", entry->timestamp);
 }

-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
-{
-    BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
-
-    switch (access_counter_type) {
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
-        UVM_ENUM_STRING_DEFAULT();
-    }
-}
-
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
 {
-    if (!entry->address.is_virtual) {
-        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
-                      entry->address.address,
-                      uvm_aperture_string(entry->address.aperture));
-    }
-    else {
-        UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
-        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
-                      entry->virtual_info.instance_ptr.address,
-                      uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
-        UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
-        UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->virtual_info.mmu_engine_id);
-        UVM_DBG_PRINT("    ve_id           %u\n", entry->virtual_info.ve_id);
-    }
-
-    UVM_DBG_PRINT("    is_virtual      %u\n", entry->address.is_virtual);
-    UVM_DBG_PRINT("    counter_type    %s\n", uvm_access_counter_type_string(entry->counter_type));
+    UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
+    UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
+                  entry->instance_ptr.address,
+                  uvm_aperture_string(entry->instance_ptr.aperture));
+    UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
+    UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->mmu_engine_id);
+    UVM_DBG_PRINT("    ve_id           %u\n", entry->ve_id);
    UVM_DBG_PRINT("    counter_value   %u\n", entry->counter_value);
    UVM_DBG_PRINT("    subgranularity  0x%08x\n", entry->sub_granularity);
    UVM_DBG_PRINT("    bank            %u\n", entry->bank);
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -686,54 +686,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);

 // Access counters
-typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);

 // Parse the entry on the given buffer index. This also clears the valid bit of
 // the entry in the buffer.
-typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
+typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
                                                            NvU32 index,
                                                            uvm_access_counter_buffer_entry_t *buffer_entry);
-typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                               NvU32 index);
+typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                                  NvU32 index);
 typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
-typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
 typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
                                                        const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get);
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index);
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index);
 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
-void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
                                                               const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                                     NvU32 index,
-                                                     uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
-
-void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
-void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
-void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
-                                                 const uvm_access_counter_buffer_entry_t *buffer_entry);
-
-void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
+void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
+                                                      NvU32 index,
+                                                      uvm_access_counter_buffer_entry_t *buffer_entry);
+bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
+void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
+                                                  const uvm_access_counter_buffer_entry_t *buffer_entry);

 // The source and destination addresses must be 16-byte aligned. Note that the
 // best performance is achieved with 256-byte alignment. The decrypt size must
@@ -786,7 +784,6 @@ struct uvm_host_hal_struct
    uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
    uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
    uvm_hal_access_counter_clear_all_t access_counter_clear_all;
-    uvm_hal_access_counter_clear_type_t access_counter_clear_type;
    uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
    uvm_hal_get_time_t get_time;
 };
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
    return max(membar_1, membar_2);
 }

-typedef enum
-{
-    UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
-    UVM_ACCESS_COUNTER_TYPE_MOMC,
-
-    UVM_ACCESS_COUNTER_TYPE_MAX,
-} uvm_access_counter_type_t;
-
-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
-
 struct uvm_access_counter_buffer_entry_struct
 {
-    // Whether this counter refers to outbound accesses to remote GPUs or
-    // sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
-    // GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
-    uvm_access_counter_type_t counter_type;
-
    // Address of the region for which a notification was sent
-    uvm_gpu_address_t address;
+    NvU64 address;

-    union
-    {
-        // These fields are only valid if address.is_virtual is true
-        struct
-        {
-            // Instance pointer of one of the channels in the TSG that triggered
-            // the notification.
-            uvm_gpu_phys_address_t instance_ptr;
+    // Instance pointer of one of the channels in the TSG that triggered
+    // the notification.
+    uvm_gpu_phys_address_t instance_ptr;

-            uvm_mmu_engine_type_t mmu_engine_type;
+    uvm_mmu_engine_type_t mmu_engine_type;

-            NvU32 mmu_engine_id;
+    NvU32 mmu_engine_id;

-            // Identifier of the subcontext that performed the memory accesses
-            // that triggered the notification. This value, combined with the
-            // instance_ptr, is needed to obtain the GPU VA space of the process
-            // that triggered the notification.
-            NvU32 ve_id;
+    // Identifier of the subcontext that performed the memory accesses
+    // that triggered the notification. This value, combined with the
+    // instance_ptr, is needed to obtain the GPU VA space of the process
+    // that triggered the notification.
+    NvU32 ve_id;

-            // VA space for the address that triggered the notification
-            uvm_va_space_t *va_space;
-        } virtual_info;
+    // VA space for the address that triggered the notification
+    uvm_va_space_t *va_space;

-        // These fields are only valid if address.is_virtual is false
-        struct
-        {
-            // Processor id where data is resident
-            //
-            // Although this information is not tied to a VA space, we can use
-            // a regular processor id because P2P is not allowed between
-            // partitioned GPUs.
-            uvm_processor_id_t resident_id;
-
-        } physical_info;
-    };
-
-    // This is the GPU that triggered the notification. Note that physical
-    // address based notifications are only supported on non-MIG-capable GPUs.
+    // This is the GPU that triggered the notification.
    uvm_gpu_t *gpu;

    // Number of times the tracked region was accessed since the last time it
    // was cleared. Counter values saturate at the maximum value supported by
-    // the GPU (2^16 - 1 in Volta)
+    // the GPU (2^16 - 1 on Turing)
    NvU32 counter_value;

    // When the granularity of the tracked regions is greater than 64KB, the
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -1602,7 +1602,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
        return status;
    }

-    status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
+    status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
    if (status != NV_OK) {
        uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
        uvm_cpu_chunk_free(chunk);
--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Hopper covers 64 PB and that's the minimum
@@ -99,8 +97,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2020 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
        return;

    if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
-                      atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
-                      uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
+                        atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
+                        uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
                        " insmod with uvm_leak_checker=2 for detailed information." :
                        "");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");

        if (g_uvm_global.unload_state.ptr)
            *g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
        uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
            uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);

-            printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
-                   uvm_kvsize((void *)((uintptr_t)info->node.key)),
-                   kbasename(info->file),
-                   info->line,
-                   info->function,
-                   info->node.key);
+            UVM_INFO_PRINT("    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
+                            uvm_kvsize((void *)((uintptr_t)info->node.key)),
+                            kbasename(info->file),
+                            info->line,
+                            info->function,
+                            info->node.key);

            // Free so we don't keep eating up memory while debugging. Note that
            // this also removes the entry from the table, frees info, and drops
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,12 +27,13 @@

 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
 {
-    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);

    switch (lock_order) {
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -69,6 +69,17 @@
 //
 //      This should be taken whenever global GPU state might need to be modified.
 //
+// - Access counters VA space enablement state lock
+//      Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
+//      Exclusive lock (mutex)
+//
+//      This protects VA space state associated with access counters enablement.
+//      Blackwell+ GPUs may have multiple access counters notification buffers
+//      and their "atomic" enablement is protected by this lock.
+//
+//      This should be taken whenever VA space access counters state might need
+//      to be modified.
+//
 // - GPU ISR lock
 //      Order: UVM_LOCK_ORDER_ISR
 //      Exclusive lock (mutex) per gpu
@@ -487,6 +498,7 @@ typedef enum
    UVM_LOCK_ORDER_INVALID = 0,
    UVM_LOCK_ORDER_GLOBAL_PM,
    UVM_LOCK_ORDER_GLOBAL,
+    UVM_LOCK_ORDER_ACCESS_COUNTERS,
    UVM_LOCK_ORDER_ISR,
    UVM_LOCK_ORDER_MMAP_LOCK,
    UVM_LOCK_ORDER_VA_SPACES_LIST,
@@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
        ret;                                            \
    })

-// Helper for calling a UVM-RM interface function that returns void with lock recording
+// Helper for calling a UVM-RM interface function that returns void with lock
+// recording
 #define uvm_rm_locked_call_void(call) ({                \
        uvm_record_lock_rm_all();                       \
        call;                                           \
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@@ -63,8 +63,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = false;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
--- a/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -24,25 +24,29 @@
 #include "uvm_gpu.h"
 #include "uvm_hal.h"

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "enable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "disable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get)
 {
    UVM_ASSERT_MSG(false,
-                   "clear_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
@@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
    return 0;
 }

-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
    return false;
 }

-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2022 NVIDIA Corporation
+    Copyright (c) 2021-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -330,11 +330,6 @@ void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
    UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
 }

-void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
-{
-    UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
-}
-
 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
                                                               const uvm_access_counter_buffer_entry_t *buffer_entry)
 {
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -582,7 +582,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
        managed_range_last = managed_range;

        // For UVM-Lite GPUs, the CUDA driver may suballocate a single
-        // managed_range into many range groups.  For this reason, we iterate
+        // managed_range into many range groups. For this reason, we iterate
        // over each managed_range first then through the range groups within.
        uvm_range_group_for_each_migratability_in(&iter,
                                                  va_space,
@@ -865,9 +865,9 @@ NV_STATUS uvm_migrate_init(void)
        else {
            g_uvm_perf_migrate_cpu_preunmap_size = UVM_VA_BLOCK_SIZE << UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT;

-            pr_info("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
-                    uvm_perf_migrate_cpu_preunmap_block_order,
-                    UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
+            UVM_INFO_PRINT("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
+                           uvm_perf_migrate_cpu_preunmap_block_order,
+                           UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
        }
    }

@@ -909,14 +909,13 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)

    if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
        UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
-        UVM_INFO_PRINT("TEMP\n");
        return NV_ERR_INVALID_ARGUMENT;
    }

    gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
    if (!gpus_to_check_for_nvlink_errors)
        return NV_ERR_NO_MEMORY;
- 
+
    uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);

    // mmap_lock will be needed if we have to create CPU mappings
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -90,9 +90,9 @@ NV_STATUS uvm_mmu_init(void)
        page_table_aperture = UVM_APERTURE_SYS;
    }
    else {
-        pr_info("Invalid uvm_page_table_location %s. Using %s instead.\n",
-                uvm_page_table_location,
-                uvm_aperture_string(page_table_aperture));
+        UVM_INFO_PRINT("Invalid uvm_page_table_location %s. Using %s instead.\n",
+                       uvm_page_table_location,
+                       uvm_aperture_string(page_table_aperture));
    }

    return NV_OK;
--- a/kernel-open/nvidia-uvm/uvm_pascal.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -40,10 +40,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_pascal_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Pascal covers 128 TB and that's the minimum
@@ -92,8 +92,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = false;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
--- a/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,8 +44,8 @@ void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnSet;
-    mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
+    reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnSet;
+    mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);
 }
@@ -55,33 +55,33 @@ void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
-    mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
+    reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
+    mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);
 }

 NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
-    UVM_ASSERT(put < parent_gpu->fault_buffer_info.replayable.max_faults);
+    NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
+    UVM_ASSERT(put < parent_gpu->fault_buffer.replayable.max_faults);

    return put;
 }

 NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
-    UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
+    NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
+    UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);

    return get;
 }

 void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);

-    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, index);
+    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, index);
 }

 static uvm_fault_access_type_t get_fault_access_type(const NvU32 *fault_entry)
@@ -189,9 +189,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
    fault_buffer_entry_b069_t *buffer_start;
    NvU32 *fault_entry;

-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);

-    buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
+    buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
    fault_entry = (NvU32 *)&buffer_start[index];

    return fault_entry;
@@ -205,10 +205,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
 {
    UvmFaultMetadataPacket *fault_entry_metadata;

-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

-    fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
+    fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
    UVM_ASSERT(fault_entry_metadata != NULL);

    return fault_entry_metadata + index;
@@ -267,7 +267,7 @@ NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *p

    // Compute global uTLB id
    utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
-    UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
+    UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);

    buffer_entry->fault_source.utlb_id = utlb_id;

--- a/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -21,7 +21,6 @@

 *******************************************************************************/

-
 // For Pascal, UVM page tree 'depth' maps to hardware as follows:
 //
 // UVM depth   HW level                            VA bits
@@ -377,7 +376,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)

 static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
 {
-    volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
+    volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer.rm_info.replayable.pPrefetchCtrl;

    // A null prefetch control mapping indicates that UVM should toggle the
    // register's value using the RM API, instead of performing a direct access.
@@ -388,7 +387,7 @@ static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
        // Computing.
        UVM_ASSERT(g_uvm_global.conf_computing_enabled);

-        status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer_info.rm_info, (NvBool)enable);
+        status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer.rm_info, (NvBool)enable);

        UVM_ASSERT(status == NV_OK);
    }
--- a/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -512,8 +512,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
        g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
    }
    else {
-        pr_info("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
-                uvm_perf_prefetch_threshold, UVM_PREFETCH_THRESHOLD_DEFAULT);
+        UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
+                       uvm_perf_prefetch_threshold,
+                       UVM_PREFETCH_THRESHOLD_DEFAULT);

        g_uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT;
    }
@@ -523,8 +524,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
        g_uvm_perf_prefetch_min_faults = uvm_perf_prefetch_min_faults;
    }
    else {
-        pr_info("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
-                uvm_perf_prefetch_min_faults, UVM_PREFETCH_MIN_FAULTS_DEFAULT);
+        UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
+                       uvm_perf_prefetch_min_faults,
+                       UVM_PREFETCH_MIN_FAULTS_DEFAULT);

        g_uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT;
    }
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@@ -338,28 +338,28 @@ static unsigned g_uvm_perf_thrashing_max_resets;
 // parameter _d. The user value is read from _v, and the final value is stored
 // in a variable named g_##_v, so it must be declared, too. Only unsigned
 // parameters are supported.
-#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma)                      \
-    do {                                                                        \
-        unsigned v = (_v);                                                      \
-        unsigned d = (_d);                                                      \
-        unsigned mi = (_mi);                                                    \
-        unsigned ma = (_ma);                                                    \
-                                                                                \
-        BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned));                            \
-        BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v));                        \
-                                                                                \
-        UVM_ASSERT(mi <= ma);                                                   \
-        UVM_ASSERT(d >= mi);                                                    \
-        UVM_ASSERT(d <= ma);                                                    \
-                                                                                \
-        if (v >= mi && v <= ma) {                                               \
-            g_##_v = v;                                                         \
-        }                                                                       \
-        else {                                                                  \
-            pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d);  \
-                                                                                \
-            g_##_v = d;                                                         \
-        }                                                                       \
+#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma)                            \
+    do {                                                                              \
+        unsigned v = (_v);                                                            \
+        unsigned d = (_d);                                                            \
+        unsigned mi = (_mi);                                                          \
+        unsigned ma = (_ma);                                                          \
+                                                                                      \
+        BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned));                                  \
+        BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v));                              \
+                                                                                      \
+        UVM_ASSERT(mi <= ma);                                                         \
+        UVM_ASSERT(d >= mi);                                                          \
+        UVM_ASSERT(d <= ma);                                                          \
+                                                                                      \
+        if (v >= mi && v <= ma) {                                                     \
+            g_##_v = v;                                                               \
+        }                                                                             \
+        else {                                                                        \
+            UVM_INFO_PRINT("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
+                                                                                      \
+            g_##_v = d;                                                               \
+        }                                                                             \
    } while (0)

 #define INIT_THRASHING_PARAMETER(v, d)                 INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)
--- a/kernel-open/nvidia-uvm/uvm_pmm_sysmem.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_sysmem.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -31,21 +31,14 @@ static int uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
 module_param(uvm_cpu_chunk_allocation_sizes, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(uvm_cpu_chunk_allocation_sizes, "OR'ed value of all CPU chunk allocation sizes.");

-static struct kmem_cache *g_reverse_page_map_cache __read_mostly;
-
 NV_STATUS uvm_pmm_sysmem_init(void)
 {
-    g_reverse_page_map_cache = NV_KMEM_CACHE_CREATE("uvm_pmm_sysmem_page_reverse_map_t",
-                                                    uvm_reverse_map_t);
-    if (!g_reverse_page_map_cache)
-        return NV_ERR_NO_MEMORY;
-
    // Ensure that only supported CPU chunk sizes are enabled.
    uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
    if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
-        pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
-                uvm_cpu_chunk_allocation_sizes,
-                UVM_CPU_CHUNK_SIZES);
+        UVM_INFO_PRINT("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
+                       uvm_cpu_chunk_allocation_sizes,
+                       UVM_CPU_CHUNK_SIZES);
        uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
    }

@@ -54,387 +47,11 @@ NV_STATUS uvm_pmm_sysmem_init(void)

 void uvm_pmm_sysmem_exit(void)
 {
-    kmem_cache_destroy_safe(&g_reverse_page_map_cache);
-}
-
-NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings)
-{
-    memset(sysmem_mappings, 0, sizeof(*sysmem_mappings));
-
-    sysmem_mappings->gpu = gpu;
-
-    uvm_mutex_init(&sysmem_mappings->reverse_map_lock, UVM_LOCK_ORDER_LEAF);
-    uvm_init_radix_tree_preloadable(&sysmem_mappings->reverse_map_tree);
-
-    return NV_OK;
-}
-
-void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings)
-{
-    if (sysmem_mappings->gpu) {
-        UVM_ASSERT_MSG(radix_tree_empty(&sysmem_mappings->reverse_map_tree),
-                       "radix_tree not empty for GPU %s\n",
-                       uvm_gpu_name(sysmem_mappings->gpu));
-    }
-
-    sysmem_mappings->gpu = NULL;
-}
-
-// TODO: Bug 1995015: use a more efficient data structure for
-// physically-contiguous allocations.
-NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  NvU64 virt_addr,
-                                                  NvU64 region_size,
-                                                  uvm_va_block_t *va_block,
-                                                  uvm_processor_id_t owner)
-{
-    NV_STATUS status = NV_OK;
-    uvm_reverse_map_t *new_reverse_map;
-    NvU64 key;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-    const NvU32 num_pages = region_size / PAGE_SIZE;
-    uvm_page_index_t page_index;
-
-    UVM_ASSERT(va_block);
-    UVM_ASSERT(!uvm_va_block_is_dead(va_block));
-    UVM_ASSERT(IS_ALIGNED(dma_addr, region_size));
-    UVM_ASSERT(IS_ALIGNED(virt_addr, region_size));
-    UVM_ASSERT(region_size <= UVM_VA_BLOCK_SIZE);
-    UVM_ASSERT(is_power_of_2(region_size));
-    UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr));
-    UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr + region_size - 1));
-    uvm_assert_mutex_locked(&va_block->lock);
-
-    if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
-        return NV_OK;
-
-    new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
-    if (!new_reverse_map)
-        return NV_ERR_NO_MEMORY;
-
-    page_index = uvm_va_block_cpu_page_index(va_block, virt_addr);
-
-    new_reverse_map->va_block = va_block;
-    new_reverse_map->region   = uvm_va_block_region(page_index, page_index + num_pages);
-    new_reverse_map->owner    = owner;
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-    for (key = base_key; key < base_key + num_pages; ++key) {
-        int ret = radix_tree_insert(&sysmem_mappings->reverse_map_tree, key, new_reverse_map);
-        if (ret != 0) {
-            NvU64 remove_key;
-
-            for (remove_key = base_key; remove_key < key; ++remove_key)
-                (void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
-
-            kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
-            status = errno_to_nv_status(ret);
-            break;
-        }
-    }
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-
-    // The assert is added for Coverity's sake. It is equivalent to adding
-    // assert(num_pages > 0) before the loop. However, Coverity is not able to
-    // deduce that the loop has to execute at least once from num_pages > 0.
-    UVM_ASSERT(key != base_key || status != NV_OK);
-
-    return status;
-}
-
-static void pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                   NvU64 dma_addr,
-                                                   bool check_mapping)
-{
-    uvm_reverse_map_t *reverse_map;
-    NvU64 key;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-
-    if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
-        return;
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-
-    reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, base_key);
-    if (check_mapping)
-        UVM_ASSERT(reverse_map);
-
-    if (!reverse_map) {
-        uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-        return;
-    }
-
-    uvm_assert_mutex_locked(&reverse_map->va_block->lock);
-
-    for (key = base_key + 1; key < base_key + uvm_va_block_region_num_pages(reverse_map->region); ++key) {
-        uvm_reverse_map_t *curr_reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, key);
-        UVM_ASSERT(curr_reverse_map == reverse_map);
-    }
-
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-
-    kmem_cache_free(g_reverse_page_map_cache, reverse_map);
-}
-
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
-{
-    pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, true);
-}
-
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
-{
-    pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, false);
-}
-
-void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  uvm_va_block_t *va_block)
-{
-    NvU64 virt_addr;
-    uvm_reverse_map_t *reverse_map;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-    uvm_page_index_t new_start_page;
-
-    UVM_ASSERT(PAGE_ALIGNED(dma_addr));
-    UVM_ASSERT(va_block);
-    UVM_ASSERT(!uvm_va_block_is_dead(va_block));
-
-    if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
-        return;
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-
-    reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
-    UVM_ASSERT(reverse_map);
-
-    // Compute virt address by hand since the old VA block may be messed up
-    // during split
-    virt_addr = reverse_map->va_block->start + reverse_map->region.first * PAGE_SIZE;
-    new_start_page = uvm_va_block_cpu_page_index(va_block, virt_addr);
-
-    reverse_map->region   = uvm_va_block_region(new_start_page,
-                                                new_start_page + uvm_va_block_region_num_pages(reverse_map->region));
-    reverse_map->va_block = va_block;
-
-    UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_map)));
-    UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_map)));
-
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-}
-
-NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                     NvU64 dma_addr,
-                                                     NvU64 new_region_size)
-{
-    uvm_reverse_map_t *orig_reverse_map;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-    const size_t num_pages = new_region_size / PAGE_SIZE;
-    size_t old_num_pages;
-    size_t subregion, num_subregions;
-    uvm_reverse_map_t **new_reverse_maps;
-
-    UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
-    UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
-    UVM_ASSERT(is_power_of_2(new_region_size));
-
-    if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
-        return NV_OK;
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-    orig_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-
-    // We can access orig_reverse_map outside the tree lock because we hold the
-    // VA block lock so we cannot have concurrent modifications in the tree for
-    // the mappings of the chunks that belong to that VA block.
-    UVM_ASSERT(orig_reverse_map);
-    UVM_ASSERT(orig_reverse_map->va_block);
-    uvm_assert_mutex_locked(&orig_reverse_map->va_block->lock);
-    old_num_pages = uvm_va_block_region_num_pages(orig_reverse_map->region);
-    UVM_ASSERT(num_pages < old_num_pages);
-
-    num_subregions = old_num_pages / num_pages;
-
-    new_reverse_maps = uvm_kvmalloc_zero(sizeof(*new_reverse_maps) * (num_subregions - 1));
-    if (!new_reverse_maps)
-        return NV_ERR_NO_MEMORY;
-
-    // Allocate the descriptors for the new subregions
-    for (subregion = 1; subregion < num_subregions; ++subregion) {
-        uvm_reverse_map_t *new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
-        uvm_page_index_t page_index = orig_reverse_map->region.first + num_pages * subregion;
-
-        if (new_reverse_map == NULL) {
-            // On error, free the previously-created descriptors
-            while (--subregion != 0)
-                kmem_cache_free(g_reverse_page_map_cache, new_reverse_maps[subregion - 1]);
-
-            uvm_kvfree(new_reverse_maps);
-            return NV_ERR_NO_MEMORY;
-        }
-
-        new_reverse_map->va_block = orig_reverse_map->va_block;
-        new_reverse_map->region   = uvm_va_block_region(page_index, page_index + num_pages);
-        new_reverse_map->owner    = orig_reverse_map->owner;
-
-        new_reverse_maps[subregion - 1] = new_reverse_map;
-    }
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-
-    for (subregion = 1; subregion < num_subregions; ++subregion) {
-        NvU64 key;
-
-        for (key = base_key + num_pages * subregion; key < base_key + num_pages * (subregion + 1); ++key) {
-            void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
-            UVM_ASSERT(slot);
-            UVM_ASSERT(radix_tree_deref_slot(slot) == orig_reverse_map);
-
-            NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, new_reverse_maps[subregion - 1]);
-        }
-    }
-
-    orig_reverse_map->region = uvm_va_block_region(orig_reverse_map->region.first,
-                                                   orig_reverse_map->region.first + num_pages);
-
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-
-    uvm_kvfree(new_reverse_maps);
-    return NV_OK;
-}
-
-void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                NvU64 dma_addr,
-                                                NvU64 new_region_size)
-{
-    uvm_reverse_map_t *first_reverse_map;
-    uvm_page_index_t running_page_index;
-    NvU64 key;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-    const size_t num_pages = new_region_size / PAGE_SIZE;
-    size_t num_mapping_pages;
-
-    UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
-    UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
-    UVM_ASSERT(is_power_of_2(new_region_size));
-
-    if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
-        return;
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-
-    // Find the first mapping in the region
-    first_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
-    UVM_ASSERT(first_reverse_map);
-    num_mapping_pages = uvm_va_block_region_num_pages(first_reverse_map->region);
-    UVM_ASSERT(num_pages >= num_mapping_pages);
-    UVM_ASSERT(IS_ALIGNED(base_key, num_mapping_pages));
-
-    // The region in the tree matches the size of the merged region, just return
-    if (num_pages == num_mapping_pages)
-        goto unlock_no_update;
-
-    // Otherwise update the rest of slots to point at the same reverse map
-    // descriptor
-    key = base_key + uvm_va_block_region_num_pages(first_reverse_map->region);
-    running_page_index = first_reverse_map->region.outer;
-    while (key < base_key + num_pages) {
-        uvm_reverse_map_t *reverse_map = NULL;
-        void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
-        size_t slot_index;
-        UVM_ASSERT(slot);
-
-        reverse_map = radix_tree_deref_slot(slot);
-        UVM_ASSERT(reverse_map);
-        UVM_ASSERT(reverse_map != first_reverse_map);
-        UVM_ASSERT(reverse_map->va_block == first_reverse_map->va_block);
-        UVM_ASSERT(uvm_id_equal(reverse_map->owner, first_reverse_map->owner));
-        UVM_ASSERT(reverse_map->region.first == running_page_index);
-
-        NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
-
-        num_mapping_pages = uvm_va_block_region_num_pages(reverse_map->region);
-        UVM_ASSERT(IS_ALIGNED(key, num_mapping_pages));
-        UVM_ASSERT(key + num_mapping_pages <= base_key + num_pages);
-
-        for (slot_index = 1; slot_index < num_mapping_pages; ++slot_index) {
-            slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key + slot_index);
-            UVM_ASSERT(slot);
-            UVM_ASSERT(reverse_map == radix_tree_deref_slot(slot));
-
-            NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
-        }
-
-        key += num_mapping_pages;
-        running_page_index = reverse_map->region.outer;
-
-        kmem_cache_free(g_reverse_page_map_cache, reverse_map);
-    }
-
-    // Grow the first mapping to cover the whole region
-    first_reverse_map->region.outer = first_reverse_map->region.first + num_pages;
-
-unlock_no_update:
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-}
-
-size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                           NvU64 dma_addr,
-                                           NvU64 region_size,
-                                           uvm_reverse_map_t *out_mappings,
-                                           size_t max_out_mappings)
-{
-    NvU64 key;
-    size_t num_mappings = 0;
-    const NvU64 base_key = dma_addr / PAGE_SIZE;
-    NvU32 num_pages = region_size / PAGE_SIZE;
-
-    UVM_ASSERT(region_size >= PAGE_SIZE);
-    UVM_ASSERT(PAGE_ALIGNED(region_size));
-    UVM_ASSERT(sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses);
-    UVM_ASSERT(max_out_mappings > 0);
-
-    uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
-
-    key = base_key;
-    do {
-        uvm_reverse_map_t *reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, key);
-
-        if (reverse_map) {
-            size_t num_chunk_pages = uvm_va_block_region_num_pages(reverse_map->region);
-            NvU32 page_offset = key & (num_chunk_pages - 1);
-            NvU32 num_mapping_pages = min(num_pages, (NvU32)num_chunk_pages - page_offset);
-
-            // Sysmem mappings are removed during VA block destruction.
-            // Therefore, we can safely retain the VA blocks as long as they
-            // are in the reverse map and we hold the reverse map lock.
-            uvm_va_block_retain(reverse_map->va_block);
-            out_mappings[num_mappings]               = *reverse_map;
-            out_mappings[num_mappings].region.first += page_offset;
-            out_mappings[num_mappings].region.outer  = out_mappings[num_mappings].region.first + num_mapping_pages;
-
-            if (++num_mappings == max_out_mappings)
-                break;
-
-            num_pages -= num_mapping_pages;
-            key       += num_mapping_pages;
-        }
-        else {
-            --num_pages;
-            ++key;
-        }
-    }
-    while (num_pages > 0);
-
-    uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
-
-    return num_mappings;
 }

 uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
 {
-        return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
+    return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
 }

 static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
--- a/kernel-open/nvidia-uvm/uvm_pmm_sysmem.h
+++ b/kernel-open/nvidia-uvm/uvm_pmm_sysmem.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -30,96 +30,12 @@
 #include "uvm_lock.h"
 #include "uvm_pmm_gpu.h"

-// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
-// this implements a reverse map of the DMA address to {va_block, virt_addr}.
-// This is required by the GPU access counters feature since they may provide a
-// physical address in the notification packet (GPA notifications). We use the
-// table to obtain the VAs of the memory regions being accessed remotely. The
-// reverse map is implemented by a radix tree, which is indexed using the
-// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
-// big/huge pages).
-//
-// TODO: Bug 1995015: add support for physically-contiguous mappings.
-struct uvm_pmm_sysmem_mappings_struct
-{
-    uvm_gpu_t                                      *gpu;
-
-    struct radix_tree_root             reverse_map_tree;
-
-    uvm_mutex_t                        reverse_map_lock;
-};
-
 // Global initialization/exit functions, that need to be called during driver
 // initialization/tear-down. These are needed to allocate/free global internal
 // data structures.
 NV_STATUS uvm_pmm_sysmem_init(void);
 void uvm_pmm_sysmem_exit(void);

-// Initialize per-GPU sysmem mapping tracking
-NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
-
-// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
-// mappings have been removed before calling this function.
-void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
-NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  NvU64 virt_addr,
-                                                  NvU64 region_size,
-                                                  uvm_va_block_t *va_block,
-                                                  uvm_processor_id_t owner);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// entries for the physical region starting at dma_addr are removed from the
-// reverse map.
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
-
-// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
-// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// mapping for the region starting at dma_addr is updated with va_block.
-// This is required on VA block split.
-void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  uvm_va_block_t *va_block);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// mapping for the region starting at dma_addr is split into regions of
-// new_region_size. new_region_size must be a power of two and smaller than the
-// previously-registered size.
-NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                     NvU64 dma_addr,
-                                                     NvU64 new_region_size);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, all
-// the mappings within the region [dma_addr, dma_addr + new_region_size) are
-// merged into a single mapping. new_region_size must be a power of two. The
-// whole region must be previously populated with mappings and all of them must
-// have the same VA block and processor owner.
-void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                NvU64 dma_addr,
-                                                NvU64 new_region_size);
-
-// Obtain the {va_block, virt_addr} information for the mappings in the given
-// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
-// page-aligned.
-//
-// Valid translations are written to out_mappings sequentially (there are no
-// gaps). max_out_mappings are written, at most. The caller is required to
-// provide enough entries in out_mappings.
-//
-// The VA Block in each returned translation entry is retained, and it's up to
-// the caller to release them
-size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                           NvU64 dma_addr,
-                                           NvU64 region_size,
-                                           uvm_reverse_map_t *out_mappings,
-                                           size_t max_out_mappings);
-
 #define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)

 typedef enum
@@ -425,9 +341,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
 bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);

 static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
-                                                                struct file *filp)
+                                                         struct file *filp)
 {
-        params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
-        return NV_OK;
+    params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
+    return NV_OK;
 }
 #endif
--- a/kernel-open/nvidia-uvm/uvm_pmm_sysmem_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_sysmem_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -35,544 +35,6 @@
 #include "uvm_push.h"
 #include "uvm_processors.h"

-// Pre-allocated array used for dma-to-virt translations
-static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
-
-// We use our own separate reverse map to easily specify contiguous DMA
-// address ranges
-static uvm_pmm_sysmem_mappings_t g_reverse_map;
-
-// Check that the DMA addresses in the range defined by
-// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
-// are registered in the reverse map, using one call per entry. The returned
-// virtual addresses must belong to va_block. The function assumes a 1:1
-// dma-to-virt mapping for the whole VA block
-static NV_STATUS check_reverse_map_block_page(uvm_va_block_t *va_block,
-                                              NvU64 base_dma_addr,
-                                              const uvm_page_mask_t *page_mask)
-{
-    uvm_page_index_t page_index;
-
-    for_each_va_block_page(page_index, va_block) {
-        size_t num_pages;
-
-        memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
-        num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
-                                                        base_dma_addr + page_index * PAGE_SIZE,
-                                                        PAGE_SIZE,
-                                                        g_sysmem_translations,
-                                                        PAGES_PER_UVM_VA_BLOCK);
-        if (!page_mask || uvm_page_mask_test(page_mask, page_index)) {
-            TEST_CHECK_RET(num_pages == 1);
-            TEST_CHECK_RET(g_sysmem_translations[0].va_block == va_block);
-            TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
-            TEST_CHECK_RET(uvm_reverse_map_start(&g_sysmem_translations[0]) == uvm_va_block_cpu_page_address(va_block, page_index));
-            TEST_CHECK_RET(uvm_va_block_region_num_pages(g_sysmem_translations[0].region) == 1);
-            TEST_CHECK_RET(UVM_ID_IS_CPU(g_sysmem_translations[0].owner));
-            uvm_va_block_release(g_sysmem_translations[0].va_block);
-        }
-        else {
-            TEST_CHECK_RET(num_pages == 0);
-        }
-    }
-
-    return NV_OK;
-}
-
-// Check that the DMA addresses in the range defined by
-// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
-// are registered in the reverse map, using a single translation call. The
-// returned virtual addresses must belong to va_block. The function assumes a
-// 1:1 dma-to-virt mapping for the whole VA block
-static NV_STATUS check_reverse_map_block_batch(uvm_va_block_t *va_block,
-                                               NvU64 base_dma_addr,
-                                               const uvm_page_mask_t *page_mask)
-{
-    size_t num_translations;
-    size_t num_pages;
-    size_t reverse_map_index;
-
-    memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
-    num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
-                                                           base_dma_addr,
-                                                           uvm_va_block_size(va_block),
-                                                           g_sysmem_translations,
-                                                           PAGES_PER_UVM_VA_BLOCK);
-    if (num_translations == 0 && page_mask)
-        TEST_CHECK_RET(uvm_page_mask_empty(page_mask));
-
-    num_pages = 0;
-    for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
-        uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
-        size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
-
-        num_pages += num_reverse_map_pages;
-
-        TEST_CHECK_RET(reverse_map->va_block == va_block);
-        TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
-        uvm_va_block_release(reverse_map->va_block);
-        TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
-    }
-
-    if (page_mask)
-        TEST_CHECK_RET(num_pages == uvm_page_mask_weight(page_mask));
-    else
-        TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block));
-
-    return NV_OK;
-}
-
-// Check that the DMA addresses for all the CPU pages of the two given VA blocks
-// are registered in the reverse map, using a single translation call. The
-// returned virtual addresses must belong to one of the blocks. The function
-// assumes a 1:1 dma-to-virt mapping for each VA block and that va_block1 is
-// mapped behind va_block0.
-static NV_STATUS check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,
-                                                    uvm_va_block_t *va_block0,
-                                                    uvm_va_block_t *va_block1)
-{
-    size_t num_pages;
-    size_t num_translations;
-    size_t reverse_map_index;
-
-    memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
-    num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
-                                                           base_dma_addr,
-                                                           UVM_VA_BLOCK_SIZE,
-                                                           g_sysmem_translations,
-                                                           PAGES_PER_UVM_VA_BLOCK);
-    TEST_CHECK_RET(num_translations == 2);
-
-    num_pages = 0;
-    for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
-        uvm_va_block_t *block;
-        uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
-        NvU64 virt_addr = uvm_reverse_map_start(reverse_map);
-        size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
-
-        if (reverse_map_index == 0)
-            block = va_block0;
-        else
-            block = va_block1;
-
-        TEST_CHECK_RET(reverse_map->va_block == block);
-        TEST_CHECK_RET(nv_kref_read(&block->kref) >= 2);
-        uvm_va_block_release(reverse_map->va_block);
-        TEST_CHECK_RET(num_reverse_map_pages == uvm_va_block_num_cpu_pages(block));
-        TEST_CHECK_RET(virt_addr == block->start);
-        TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
-
-        num_pages += num_reverse_map_pages;
-    }
-
-    TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block0) + uvm_va_block_num_cpu_pages(va_block1));
-
-    return NV_OK;
-}
-
-static const NvU64 g_base_dma_addr = UVM_VA_BLOCK_SIZE;
-
-// This function adds the mappings for all the subregions in va_block defined
-// by page_mask. g_base_dma_addr is used as the base DMA address for the whole
-// VA block.
-static NV_STATUS test_pmm_sysmem_reverse_map_single(uvm_va_block_t *va_block,
-                                                    uvm_page_mask_t *page_mask,
-                                                    uvm_chunk_size_t split_size,
-                                                    bool merge)
-{
-    NV_STATUS status = NV_OK;
-    uvm_va_block_region_t subregion;
-
-    TEST_CHECK_RET(is_power_of_2(split_size));
-    TEST_CHECK_RET(split_size >= PAGE_SIZE);
-
-    for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
-        TEST_CHECK_RET(is_power_of_2(uvm_va_block_region_size(subregion)));
-        uvm_mutex_lock(&va_block->lock);
-        status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                         g_base_dma_addr + subregion.first * PAGE_SIZE,
-                                                         va_block->start + subregion.first * PAGE_SIZE,
-                                                         uvm_va_block_region_size(subregion),
-                                                         va_block,
-                                                         UVM_ID_CPU);
-        uvm_mutex_unlock(&va_block->lock);
-        if (status != NV_OK)
-            return status;
-    }
-
-    TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
-    TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
-
-    if (split_size != UVM_CHUNK_SIZE_MAX) {
-        for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
-            TEST_CHECK_RET(uvm_va_block_region_size(subregion) > split_size);
-
-            uvm_mutex_lock(&va_block->lock);
-            status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&g_reverse_map,
-                                                                g_base_dma_addr + subregion.first * PAGE_SIZE,
-                                                                split_size);
-            uvm_mutex_unlock(&va_block->lock);
-            TEST_CHECK_RET(status == NV_OK);
-        }
-
-        TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
-        TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
-    }
-
-    if (split_size != UVM_CHUNK_SIZE_MAX && merge) {
-        for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
-            uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
-                                                       g_base_dma_addr + subregion.first * PAGE_SIZE,
-                                                       uvm_va_block_region_size(subregion));
-        }
-
-        TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
-        TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
-    }
-
-    for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
-        NvU64 subregion_dma_addr = g_base_dma_addr + subregion.first * PAGE_SIZE;
-
-        if (split_size == UVM_CHUNK_SIZE_MAX || merge) {
-            uvm_mutex_lock(&va_block->lock);
-            uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr);
-            uvm_mutex_unlock(&va_block->lock);
-        }
-        else {
-            size_t chunk;
-            size_t num_chunks = uvm_va_block_region_size(subregion) / split_size;
-            TEST_CHECK_RET(num_chunks > 1);
-
-            uvm_mutex_lock(&va_block->lock);
-
-            for (chunk = 0; chunk < num_chunks; ++chunk)
-                uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr + chunk * split_size);
-
-            uvm_mutex_unlock(&va_block->lock);
-        }
-    }
-
-    uvm_page_mask_zero(page_mask);
-
-    TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
-    TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
-
-    return status;
-}
-
-static uvm_page_mask_t g_page_mask;
-
-static NV_STATUS test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t *va_space, NvU64 addr)
-{
-    NV_STATUS status;
-    uvm_va_block_t *va_block;
-    const bool merge_array[] = {false, true};
-    const uvm_chunk_size_t chunk_split_array[] = { UVM_CHUNK_SIZE_4K, UVM_CHUNK_SIZE_64K, UVM_CHUNK_SIZE_MAX };
-    unsigned merge_index;
-    unsigned chunk_split_index;
-
-    status = uvm_va_block_find(va_space, addr, &va_block);
-    if (status != NV_OK)
-        return status;
-
-    TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
-
-    for (merge_index = 0; merge_index < ARRAY_SIZE(merge_array); ++merge_index) {
-        for (chunk_split_index = 0; chunk_split_index < ARRAY_SIZE(chunk_split_array); ++chunk_split_index) {
-            // The reverse map has PAGE_SIZE granularity
-            if (chunk_split_array[chunk_split_index] < PAGE_SIZE)
-                continue;
-
-            uvm_page_mask_region_fill(&g_page_mask, uvm_va_block_region_from_block(va_block));
-
-            TEST_CHECK_RET(test_pmm_sysmem_reverse_map_single(va_block,
-                                                              &g_page_mask,
-                                                              chunk_split_array[chunk_split_index],
-                                                              merge_array[merge_index]) == NV_OK);
-        }
-    }
-
-    return status;
-}
-
-static NV_STATUS test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t *va_space, NvU64 addr)
-{
-    NV_STATUS status;
-    uvm_va_block_t *va_block;
-    uvm_page_index_t page_index;
-
-    status = uvm_va_block_find(va_space, addr, &va_block);
-    if (status != NV_OK)
-        return status;
-
-    uvm_page_mask_zero(&g_page_mask);
-
-    for_each_va_block_page(page_index, va_block) {
-        if (page_index % 2 == 0)
-            uvm_page_mask_set(&g_page_mask, page_index);
-    }
-
-    return test_pmm_sysmem_reverse_map_single(va_block, &g_page_mask, UVM_CHUNK_SIZE_MAX, false);
-}
-
-// This function assumes that addr points at a VA range with 4 sized VA blocks
-// with size UVM_VA_BLOCK_SIZE / 4.
-static NV_STATUS test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t *va_space, NvU64 addr)
-{
-    NV_STATUS status;
-    uvm_va_block_t *va_block0;
-    uvm_va_block_t *va_block1;
-    NvU64 base_dma_addr0;
-    NvU64 base_dma_addr1;
-
-    status = uvm_va_block_find(va_space, addr + UVM_VA_BLOCK_SIZE / 4, &va_block0);
-    if (status != NV_OK)
-        return status;
-
-    status = uvm_va_block_find(va_space, addr + 3 * UVM_VA_BLOCK_SIZE / 4, &va_block1);
-    if (status != NV_OK)
-        return status;
-
-    TEST_CHECK_RET(va_block0 != va_block1);
-
-    base_dma_addr0 = g_base_dma_addr + uvm_va_block_size(va_block0);
-    base_dma_addr1 = base_dma_addr0 + uvm_va_block_size(va_block0);
-
-    TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block0)));
-    TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block1)));
-
-    uvm_mutex_lock(&va_block0->lock);
-    status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                     base_dma_addr0,
-                                                     va_block0->start,
-                                                     uvm_va_block_size(va_block0),
-                                                     va_block0,
-                                                     UVM_ID_CPU);
-    uvm_mutex_unlock(&va_block0->lock);
-    TEST_CHECK_RET(status == NV_OK);
-
-    uvm_mutex_lock(&va_block1->lock);
-    status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                     base_dma_addr1,
-                                                     va_block1->start,
-                                                     uvm_va_block_size(va_block1),
-                                                     va_block1,
-                                                     UVM_ID_CPU);
-    uvm_mutex_unlock(&va_block1->lock);
-
-    // Check each VA block individually
-    if (status == NV_OK) {
-        TEST_CHECK_GOTO(check_reverse_map_block_page(va_block0, base_dma_addr0, NULL) == NV_OK, error);
-        TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block0, base_dma_addr0, NULL) == NV_OK, error);
-        TEST_CHECK_GOTO(check_reverse_map_block_page(va_block1, base_dma_addr1, NULL) == NV_OK, error);
-        TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block1, base_dma_addr1, NULL) == NV_OK, error);
-
-        // Check both VA blocks at the same time
-        TEST_CHECK_GOTO(check_reverse_map_two_blocks_batch(g_base_dma_addr, va_block0, va_block1) == NV_OK, error);
-
-error:
-        uvm_mutex_lock(&va_block1->lock);
-        uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr1);
-        uvm_mutex_unlock(&va_block1->lock);
-    }
-
-    uvm_mutex_lock(&va_block0->lock);
-    uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr0);
-    uvm_mutex_unlock(&va_block0->lock);
-
-    return status;
-}
-
-// This function registers a non-uniform distribution of chunks (mixing 4K and 64K chunks)
-// and merges them back to verify that the logic is working.
-static NV_STATUS test_pmm_sysmem_reverse_map_merge(uvm_va_space_t *va_space, NvU64 addr)
-{
-    NV_STATUS status = NV_OK;
-    uvm_va_block_t *va_block;
-    const unsigned chunks_64k_pos[] =
-    {
-        16,
-        64,
-        96,
-        192,
-        208,
-        224,
-        288,
-        320,
-        384,
-        480
-    };
-    uvm_page_index_t page_index;
-    unsigned i;
-
-    if (PAGE_SIZE != UVM_PAGE_SIZE_4K)
-        return NV_OK;
-
-    status = uvm_va_block_find(va_space, addr, &va_block);
-    if (status != NV_OK)
-        return status;
-
-    TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
-
-    page_index = 0;
-    for (i = 0; i < ARRAY_SIZE(chunks_64k_pos); ++i) {
-        // Fill with 4K mappings until the next 64K mapping
-        while (page_index < chunks_64k_pos[i]) {
-            uvm_mutex_lock(&va_block->lock);
-            status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                             g_base_dma_addr + page_index * PAGE_SIZE,
-                                                             uvm_va_block_cpu_page_address(va_block, page_index),
-                                                             PAGE_SIZE,
-                                                             va_block,
-                                                             UVM_ID_CPU);
-            uvm_mutex_unlock(&va_block->lock);
-            TEST_CHECK_RET(status == NV_OK);
-
-            ++page_index;
-        }
-
-        // Register the 64K mapping
-        uvm_mutex_lock(&va_block->lock);
-        status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                         g_base_dma_addr + page_index * PAGE_SIZE,
-                                                         uvm_va_block_cpu_page_address(va_block, page_index),
-                                                         UVM_CHUNK_SIZE_64K,
-                                                         va_block,
-                                                         UVM_ID_CPU);
-        uvm_mutex_unlock(&va_block->lock);
-        TEST_CHECK_RET(status == NV_OK);
-
-        page_index += UVM_PAGE_SIZE_64K / PAGE_SIZE;
-    }
-
-    // Fill the tail with 4K mappings, too
-    while (page_index < PAGES_PER_UVM_VA_BLOCK) {
-        uvm_mutex_lock(&va_block->lock);
-        status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                         g_base_dma_addr + page_index * PAGE_SIZE,
-                                                         uvm_va_block_cpu_page_address(va_block, page_index),
-                                                         PAGE_SIZE,
-                                                         va_block,
-                                                         UVM_ID_CPU);
-        uvm_mutex_unlock(&va_block->lock);
-        TEST_CHECK_RET(status == NV_OK);
-
-        ++page_index;
-    }
-
-    TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
-    TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
-
-    uvm_mutex_lock(&va_block->lock);
-    uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
-                                               g_base_dma_addr,
-                                               uvm_va_block_size(va_block));
-    uvm_mutex_unlock(&va_block->lock);
-
-    TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
-    TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
-
-    uvm_mutex_lock(&va_block->lock);
-    uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
-    uvm_mutex_unlock(&va_block->lock);
-
-    return status;
-}
-
-static NV_STATUS test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t *va_space, NvU64 addr)
-{
-    uvm_va_block_t *va_block;
-    NV_STATUS status = uvm_va_block_find(va_space, addr, &va_block);
-
-    if (status != NV_OK)
-        return status;
-
-    TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
-
-    uvm_mutex_lock(&va_block->lock);
-    status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
-                                                     g_base_dma_addr,
-                                                     addr,
-                                                     uvm_va_block_size(va_block),
-                                                     va_block,
-                                                     UVM_ID_CPU);
-    uvm_mutex_unlock(&va_block->lock);
-
-    uvm_mutex_lock(&va_block->lock);
-    uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
-    uvm_mutex_unlock(&va_block->lock);
-
-    TEST_CHECK_RET(status == NV_OK);
-
-    uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
-    uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
-
-    return NV_OK;
-}
-
-static NV_STATUS test_pmm_sysmem_reverse_map(uvm_va_space_t *va_space, NvU64 addr1, NvU64 addr2)
-{
-    NV_STATUS status = NV_OK;
-    uvm_gpu_t *volta_gpu = NULL;
-    uvm_gpu_t *gpu;
-
-    // Find a GPU with support for access counters with physical address
-    // notifications, since it is required to add or remove entries to the
-    // reverse map.
-    for_each_va_space_gpu(gpu, va_space) {
-        if (gpu->parent->access_counters_can_use_physical_addresses) {
-            // Initialize the reverse map.
-            status = uvm_pmm_sysmem_mappings_init(gpu, &g_reverse_map);
-            if (status != NV_OK)
-                return status;
-
-            volta_gpu = gpu;
-            break;
-        }
-    }
-
-    if (!volta_gpu)
-        return NV_ERR_INVALID_DEVICE;
-
-    status = test_pmm_sysmem_reverse_map_single_whole(va_space, addr1);
-
-    if (status == NV_OK)
-        status = test_pmm_sysmem_reverse_map_single_pattern(va_space, addr1);
-
-    if (status == NV_OK)
-        status = test_pmm_sysmem_reverse_map_many_blocks(va_space, addr2);
-
-    if (status == NV_OK)
-        status = test_pmm_sysmem_reverse_map_merge(va_space, addr1);
-
-    if (status == NV_OK)
-        status = test_pmm_sysmem_reverse_map_remove_on_eviction(va_space, addr1);
-
-    uvm_pmm_sysmem_mappings_deinit(&g_reverse_map);
-
-    return status;
-}
-
-NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp)
-{
-    NV_STATUS status;
-    uvm_va_space_t *va_space;
-
-    va_space = uvm_va_space_get(filp);
-
-    // Take the global lock to void interferences from different instances of
-    // the test, since we use a bunch of global variables
-    uvm_mutex_lock(&g_uvm_global.global_lock);
-    uvm_va_space_down_write(va_space);
-
-    status = test_pmm_sysmem_reverse_map(va_space, params->range_address1, params->range_address2);
-
-    uvm_va_space_up_write(va_space);
-    uvm_mutex_unlock(&g_uvm_global.global_lock);
-
-    return status;
-}
-
 static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
 {
    struct page **pages;
--- a/kernel-open/nvidia-uvm/uvm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -144,6 +144,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
    }

    if (gpu->parent->replayable_faults_supported) {
+        UVM_ASSERT(gpu->parent->isr.access_counters);
+        UVM_ASSERT(gpu->parent->access_counter_buffer);
+
        uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
        status = uvm_test_verify_bh_affinity(&gpu->parent->isr.replayable_faults,
                                              gpu->parent->closest_cpu_numa_node);
@@ -161,10 +164,11 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
        }

        if (gpu->parent->access_counters_supported) {
-            uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
-            status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters,
+            // We only need to test one notification buffer, we pick index 0.
+            uvm_access_counters_isr_lock(&gpu->parent->access_counter_buffer[0]);
+            status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters[0],
                                                  gpu->parent->closest_cpu_numa_node);
-            uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+            uvm_access_counters_isr_unlock(&gpu->parent->access_counter_buffer[0]);
        }
    }

@@ -311,7 +315,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DISABLE_NVLINK_PEER_ACCESS,   uvm_test_disable_nvlink_peer_access);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_GET_PAGE_THRASHING_POLICY,    uvm_test_get_page_thrashing_policy);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_PAGE_THRASHING_POLICY,    uvm_test_set_page_thrashing_policy);
-        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_SYSMEM,                   uvm_test_pmm_sysmem);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_REVERSE_MAP,              uvm_test_pmm_reverse_map);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_RETAIN,           uvm_test_va_space_mm_retain);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE, uvm_test_pmm_chunk_with_elevated_page);
@@ -350,6 +353,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_TOOLS_EVENT_V2,        uvm_test_inject_tools_event_v2);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_P2P_SUSPENDED,            uvm_test_set_p2p_suspended);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_NVLINK_ERROR,          uvm_test_inject_nvlink_error);
+        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_QUERY_ACCESS_COUNTERS,        uvm_test_query_access_counters);
    }

    return -EINVAL;
--- a/kernel-open/nvidia-uvm/uvm_test.h
+++ b/kernel-open/nvidia-uvm/uvm_test.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -28,8 +28,7 @@
 #include "uvm_common.h"
 #include "uvm_test_ioctl.h"

-// Unlike UVM_INFO_PRINT, this prints on release builds
-#define UVM_TEST_PRINT(fmt, ...) UVM_PRINT_FUNC(pr_info, " " fmt, ##__VA_ARGS__)
+#define UVM_TEST_PRINT UVM_ERR_PRINT_ALWAYS

 // WARNING: This macro will return out of the current scope
 #define TEST_CHECK_RET(cond)                                                        \
@@ -160,30 +159,35 @@ NV_STATUS uvm_test_range_group_tree(UVM_TEST_RANGE_GROUP_TREE_PARAMS *params, st
 NV_STATUS uvm_test_range_group_range_info(UVM_TEST_RANGE_GROUP_RANGE_INFO_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_range_group_range_count(UVM_TEST_RANGE_GROUP_RANGE_COUNT_PARAMS *params, struct file *filp);

-NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
-NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
+                                                      struct file *filp);
+NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
+                                                      struct file *filp);

 NV_STATUS uvm_test_check_channel_va_space(UVM_TEST_CHECK_CHANNEL_VA_SPACE_PARAMS *params, struct file *filp);

-NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp);
-
 NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp);

-NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params,
+                                                struct file *filp);
 NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *params, struct file *filp);

 NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct file *filp);

-NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params,
+                                                struct file *filp);

 NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARAMS *params, struct file *filp);

-NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
-NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params,
+                                                      struct file *filp);
+NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params,
+                                                         struct file *filp);
 NV_STATUS uvm_test_thread_context_sanity(UVM_TEST_THREAD_CONTEXT_SANITY_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_thread_context_perf(UVM_TEST_THREAD_CONTEXT_PERF_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp);
-NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params,
+                                                struct file *filp);
 NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_sec2_sanity(UVM_TEST_SEC2_SANITY_PARAMS *params, struct file *filp);
--- a/kernel-open/nvidia-uvm/uvm_test_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_test_ioctl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVidia Corporation
+    Copyright (c) 2015-2025 NVidia Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -926,31 +926,38 @@ typedef struct

 // Change configuration of access counters. This call will disable access
 // counters and reenable them using the new configuration. All previous
-// notifications will be lost
+// notifications will be lost.
 //
 // The reconfiguration affects all VA spaces that rely on the access
 // counters information for the same GPU. To avoid conflicting configurations,
 // only one VA space is allowed to reconfigure the GPU at a time.
 //
+// When the reconfiguration VA space is destroyed, the bottom-half control
+// settings are reset.
+//
 // Error returns:
 // NV_ERR_INVALID_STATE
-//  - The GPU has already been reconfigured in a different VA space
+//  - The GPU has already been reconfigured in a different VA space.
 #define UVM_TEST_RECONFIGURE_ACCESS_COUNTERS             UVM_TEST_IOCTL_BASE(56)
 typedef struct
 {
    NvProcessorUuid                 gpu_uuid;                                           // In

    // Type UVM_ACCESS_COUNTER_GRANULARITY from nv_uvm_types.h
-    NvU32                           mimc_granularity;                                   // In
-    NvU32                           momc_granularity;                                   // In
-
-    // Type UVM_ACCESS_COUNTER_USE_LIMIT from nv_uvm_types.h
-    NvU32                           mimc_use_limit;                                     // In
-    NvU32                           momc_use_limit;                                     // In
+    NvU32                           granularity;                                        // In

    NvU32                           threshold;                                          // In
-    NvBool                          enable_mimc_migrations;                             // In
-    NvBool                          enable_momc_migrations;                             // In
+    NvBool                          enable_migrations;                                  // In
+
+    // Settings to control how notifications are serviced by the access counters
+    // bottom-half. These settings help tests to exercise races in the driver,
+    // e.g., unregister a GPU while (valid) pending notifications remain in the
+    // notification buffer.
+    //
+    // 0 max_batch_size doesn't change driver's behavior.
+    NvU32                           max_batch_size;                                     // In
+    NvBool                          one_iteration_per_batch;                            // In
+    NvU32                           sleep_per_iteration_us;                             // In

    NV_STATUS                       rmStatus;                                           // Out
 } UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS;
@@ -962,13 +969,6 @@ typedef enum
    UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX
 } UVM_TEST_ACCESS_COUNTER_RESET_MODE;

-typedef enum
-{
-    UVM_TEST_ACCESS_COUNTER_TYPE_MIMC = 0,
-    UVM_TEST_ACCESS_COUNTER_TYPE_MOMC,
-    UVM_TEST_ACCESS_COUNTER_TYPE_MAX
-} UVM_TEST_ACCESS_COUNTER_TYPE;
-
 // Clear the contents of the access counters. This call supports different
 // modes for targeted/global resets.
 #define UVM_TEST_RESET_ACCESS_COUNTERS                   UVM_TEST_IOCTL_BASE(57)
@@ -979,9 +979,6 @@ typedef struct
    // Type UVM_TEST_ACCESS_COUNTER_RESET_MODE
    NvU32                           mode;                                               // In

-    // Type UVM_TEST_ACCESS_COUNTER_TYPE
-    NvU32                           counter_type;                                       // In
-
    NvU32                           bank;                                               // In
    NvU32                           tag;                                                // In
    NV_STATUS                       rmStatus;                                           // Out
@@ -1061,14 +1058,6 @@ typedef struct
    NV_STATUS                       rmStatus;                                           // Out
 } UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS;

-#define UVM_TEST_PMM_SYSMEM                              UVM_TEST_IOCTL_BASE(64)
-typedef struct
-{
-    NvU64                           range_address1                   NV_ALIGN_BYTES(8); // In
-    NvU64                           range_address2                   NV_ALIGN_BYTES(8); // In
-    NV_STATUS                       rmStatus;                                           // Out
-} UVM_TEST_PMM_SYSMEM_PARAMS;
-
 #define UVM_TEST_PMM_REVERSE_MAP                         UVM_TEST_IOCTL_BASE(65)
 typedef struct
 {
@@ -1142,18 +1131,46 @@ typedef struct
    NV_STATUS                       rmStatus;                                           // Out
 } UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS;

-// Inject an error into the VA space
+// Inject an error into the VA space or into a to-be registered GPU.
 //
 // If migrate_vma_allocation_fail_nth is greater than 0, the nth page
 // allocation within migrate_vma will fail.
 //
 // If va_block_allocation_fail_nth is greater than 0, the nth call to
 // uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
+//
+// If gpu_access_counters_alloc_buffer is set, the parent_gpu's access counters
+// buffer allocation will fail with NV_ERR_NO_MEMORY.
+//
+// If gpu_access_counters_alloc_block_context is set, the access counters
+// buffer's block_context allocation will fail with NV_ERR_NO_MEMORY.
+//
+// If gpu_isr_access_counters_alloc is set, the ISR access counters allocation
+// will fail with NV_ERR_NO_MEMORY.
+//
+// If gpu_isr_access_counters_alloc_stats_cpu is set, the ISR access counters
+// buffer's stats_cpu allocation will fail with NV_ERR_NO_MEMORY.
+//
+// If access_counters_batch_context_notifications is set, the access counters
+// batch_context's notifications allocation will fail with NV_ERR_NO_MEMORY.
+//
+// If access_counters_batch_context_notification_cache is set, the access
+// counters batch_context's notification cache allocation will fail with
+// NV_ERR_NO_MEMORY.
+//
+// Note that only one of the gpu_* or access_counters_* setting can be selected
+// at a time.
 #define UVM_TEST_VA_SPACE_INJECT_ERROR                   UVM_TEST_IOCTL_BASE(72)
 typedef struct
 {
    NvU32                           migrate_vma_allocation_fail_nth;                    // In
    NvU32                           va_block_allocation_fail_nth;                       // In
+    NvBool                          gpu_access_counters_alloc_buffer;                   // In
+    NvBool                          gpu_access_counters_alloc_block_context;            // In
+    NvBool                          gpu_isr_access_counters_alloc;                      // In
+    NvBool                          gpu_isr_access_counters_alloc_stats_cpu;            // In
+    NvBool                          access_counters_batch_context_notifications;        // In
+    NvBool                          access_counters_batch_context_notification_cache;   // In

    NV_STATUS                       rmStatus;                                           // Out
 } UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
@@ -1505,6 +1522,16 @@ typedef struct
    NV_STATUS       rmStatus;                           // Out
 } UVM_TEST_INJECT_NVLINK_ERROR_PARAMS;

+#define UVM_TEST_QUERY_ACCESS_COUNTERS                   UVM_TEST_IOCTL_BASE(109)
+typedef struct
+{
+    NvProcessorUuid gpu_uuid;               // In
+    NvU8 num_notification_buffers;          // Out
+    NvU32 num_notification_entries;         // Out
+
+    NV_STATUS rmStatus;                     // Out
+} UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS;
+
 #ifdef __cplusplus
 }
 #endif
--- a/kernel-open/nvidia-uvm/uvm_tools.c
+++ b/kernel-open/nvidia-uvm/uvm_tools.c
@@ -1305,8 +1305,7 @@ void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_c

 void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
                                     uvm_gpu_id_t gpu_id,
-                                     const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                     bool on_managed_phys)
+                                     const uvm_access_counter_buffer_entry_t *buffer_entry)
 {
    uvm_down_read(&va_space->tools.lock);

@@ -1318,18 +1317,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,

        info->eventType           = UvmEventTypeTestAccessCounter;
        info->srcIndex            = uvm_parent_id_value_from_processor_id(gpu_id);
-        info->address             = buffer_entry->address.address;
-        info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
-        if (buffer_entry->address.is_virtual) {
-            info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
-            info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
-            info->veId                = buffer_entry->virtual_info.ve_id;
-        }
-        else {
-            info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
-        }
-        info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
-        info->physOnManaged       = on_managed_phys? 1 : 0;
+        info->address             = buffer_entry->address;
+        info->instancePtr         = buffer_entry->instance_ptr.address;
+        info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
+        info->veId                = buffer_entry->ve_id;
        info->value               = buffer_entry->counter_value;
        info->subGranularity      = buffer_entry->sub_granularity;
        info->bank                = buffer_entry->bank;
@@ -1345,18 +1336,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,

        info->eventType           = UvmEventTypeTestAccessCounter;
        info->srcIndex            = uvm_id_value(gpu_id);
-        info->address             = buffer_entry->address.address;
-        info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
-        if (buffer_entry->address.is_virtual) {
-            info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
-            info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
-            info->veId                = buffer_entry->virtual_info.ve_id;
-        }
-        else {
-            info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
-        }
-        info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
-        info->physOnManaged       = on_managed_phys? 1 : 0;
+        info->address             = buffer_entry->address;
+        info->instancePtr         = buffer_entry->instance_ptr.address;
+        info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
+        info->veId                = buffer_entry->ve_id;
        info->value               = buffer_entry->counter_value;
        info->subGranularity      = buffer_entry->sub_granularity;
        info->bank                = buffer_entry->bank;
@@ -1368,18 +1351,13 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
    uvm_up_read(&va_space->tools.lock);
 }

-void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
-                                        const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                        bool on_managed_phys)
+void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry)
 {
    uvm_va_space_t *va_space;

    uvm_down_read(&g_tools_va_space_list_lock);
    list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
-        uvm_tools_record_access_counter(va_space,
-                                        gpu->id,
-                                        buffer_entry,
-                                        on_managed_phys);
+        uvm_tools_record_access_counter(va_space, gpu->id, buffer_entry);
    }
    uvm_up_read(&g_tools_va_space_list_lock);
 }
--- a/kernel-open/nvidia-uvm/uvm_tools.h
+++ b/kernel-open/nvidia-uvm/uvm_tools.h
@@ -111,14 +111,11 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id

 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);

-void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
-                                        const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                        bool on_managed_phys);
+void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry);

 void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
                                     uvm_gpu_id_t gpu_id,
-                                     const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                     bool on_managed_phys);
+                                     const uvm_access_counter_buffer_entry_t *buffer_entry);

 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);

--- a/kernel-open/nvidia-uvm/uvm_turing.c
+++ b/kernel-open/nvidia-uvm/uvm_turing.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -37,10 +37,10 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_turing_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Turing covers 128 TB and that's the minimum
@@ -79,8 +79,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_turing_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_turing_access_counter_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -25,42 +25,174 @@
 #include "uvm_global.h"
 #include "uvm_gpu.h"
 #include "uvm_hal.h"
+#include "clc365.h"

-static void clear_access_counter_notifications_interrupt(uvm_parent_gpu_t *parent_gpu)
+typedef struct {
+    NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
+} access_counter_buffer_entry_c365_t;
+
+void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
 {
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntr;
-    mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
+    reg = access_counters->rm_info.pHubIntrEnSet;
+    mask = access_counters->rm_info.accessCounterMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);
 }

-void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
+static void clear_access_counter_notifications_interrupt(uvm_access_counter_buffer_t *access_counters)
 {
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
-    mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
+    reg = access_counters->rm_info.pHubIntr;
+    mask = access_counters->rm_info.accessCounterMask;
+
+    UVM_GPU_WRITE_ONCE(*reg, mask);
+}
+
+void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
+{
+    volatile NvU32 *reg;
+    NvU32 mask;
+
+    reg = access_counters->rm_info.pHubIntrEnClear;
+    mask = access_counters->rm_info.accessCounterMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);

    wmb();

    // See the comment in uvm_hal_turing_disable_replayable_faults
-    clear_access_counter_notifications_interrupt(parent_gpu);
+    clear_access_counter_notifications_interrupt(access_counters);
 }

-void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get)
 {
-    clear_access_counter_notifications_interrupt(parent_gpu);
+    clear_access_counter_notifications_interrupt(access_counters);

    wmb();

    // Write GET to force the re-evaluation of the interrupt condition after the
    // interrupt bit has been cleared.
-    UVM_GPU_WRITE_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferGet, get);
+    UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
 }

+NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
+{
+    return NVC365_NOTIFY_BUF_SIZE;
+}
+
+static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
+{
+    NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
+
+    switch (hw_aperture_value) {
+        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
+            return UVM_APERTURE_VID;
+        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
+        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
+             return UVM_APERTURE_SYS;
+    }
+
+    UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
+
+    return UVM_APERTURE_MAX;
+}
+
+static NvU64 get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
+{
+    NvU64 address;
+    NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
+    NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
+    NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
+
+    UVM_ASSERT(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
+
+    address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
+    address = uvm_parent_gpu_canonical_address(parent_gpu, address);
+
+    return address;
+}
+
+static NvU32 *get_access_counter_buffer_entry(uvm_access_counter_buffer_t *access_counters, NvU32 index)
+{
+    access_counter_buffer_entry_c365_t *buffer_start;
+    NvU32 *access_counter_entry;
+
+    UVM_ASSERT(index < access_counters->max_notifications);
+
+    buffer_start = (access_counter_buffer_entry_c365_t *)access_counters->rm_info.bufferAddress;
+    access_counter_entry = (NvU32 *)&buffer_start[index];
+
+    return access_counter_entry;
+}
+
+bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
+{
+    NvU32 *access_counter_entry;
+    bool is_valid;
+
+    access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
+
+    is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
+
+    return is_valid;
+}
+
+void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
+{
+    NvU32 *access_counter_entry;
+
+    access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
+
+    WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
+}
+
+void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
+                                                      NvU32 index,
+                                                      uvm_access_counter_buffer_entry_t *buffer_entry)
+{
+    NvU32 *access_counter_entry;
+    NvU64 inst_hi, inst_lo;
+
+    // Valid bit must be set before this function is called
+    UVM_ASSERT(uvm_hal_turing_access_counter_buffer_entry_is_valid(access_counters, index));
+
+    access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
+
+    UVM_ASSERT(READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE) != NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU);
+
+    buffer_entry->address = get_address(access_counters->parent_gpu, access_counter_entry);
+
+    inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
+    inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
+    buffer_entry->instance_ptr.address = inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
+
+    // HW value contains the 4K page number. Shift to build the full address
+    buffer_entry->instance_ptr.address <<= 12;
+
+    buffer_entry->instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
+
+    buffer_entry->mmu_engine_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
+
+    buffer_entry->mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
+
+    // MMU engine id aligns with the fault buffer packets. Therefore, we reuse
+    // the helper to compute the VE ID from the fault buffer class.
+    buffer_entry->ve_id = access_counters->parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->mmu_engine_id,
+                                                                                   buffer_entry->mmu_engine_type);
+
+    buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
+
+    buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
+
+    buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
+
+    buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
+
+    // Automatically clear valid bit for the entry in the access counter buffer
+    uvm_hal_turing_access_counter_buffer_entry_clear_valid(access_counters, index);
+}
--- a/kernel-open/nvidia-uvm/uvm_turing_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_turing_fault_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -32,8 +32,8 @@ static void clear_replayable_faults_interrupt(uvm_parent_gpu_t *parent_gpu)
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntr;
-    mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
+    reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntr;
+    mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);
 }
@@ -54,8 +54,8 @@ void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    volatile NvU32 *reg;
    NvU32 mask;

-    reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
-    mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
+    reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
+    mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;

    UVM_GPU_WRITE_ONCE(*reg, mask);

--- a/kernel-open/nvidia-uvm/uvm_turing_host.c
+++ b/kernel-open/nvidia-uvm/uvm_turing_host.c
@@ -361,3 +361,24 @@ void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
    if (params->membar == UvmInvalidateTlbMemBarLocal)
        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
 }
+
+void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push)
+{
+    NV_PUSH_4U(C46F, MEM_OP_A, 0,
+                     MEM_OP_B, 0,
+                     MEM_OP_C, 0,
+                     MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
+                               HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
+}
+
+void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
+                                                  const uvm_access_counter_buffer_entry_t *buffer_entry)
+{
+    NV_PUSH_4U(C46F, MEM_OP_A, 0,
+                     MEM_OP_B, 0,
+                     MEM_OP_C, HWVALUE(C46F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
+                     MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
+                               HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
+                               HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) |
+                               HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
+}
--- a/kernel-open/nvidia-uvm/uvm_types.h
+++ b/kernel-open/nvidia-uvm/uvm_types.h
@@ -1323,14 +1323,11 @@ typedef struct
    NvU8 aperture;
    NvU8 instancePtrAperture;

-    NvU8 isVirtual;
-    NvU8 isFromCpu;
+    NvU8 padding8bits;

    NvU8 veId;

-    // The physical access counter notification was triggered on a managed
-    // memory region. This is not set for virtual access counter notifications.
-    NvU8 physOnManaged;
+    NvU16 padding16bits;

    NvU32 value;
    NvU32 subGranularity;
@@ -1348,26 +1345,21 @@ typedef struct
    // data in a queue.
    //
    NvU8 eventType;
+
    // See uvm_access_counter_buffer_entry_t for details
    NvU8 aperture;
    NvU8 instancePtrAperture;
-    NvU8 isVirtual;
-    NvU8 isFromCpu;
    NvU8 veId;

-    // The physical access counter notification was triggered on a managed
-    // memory region. This is not set for virtual access counter notifications.
-    NvU8 physOnManaged;
-
    //
    // This structure is shared between UVM kernel and tools.
    // Manually padding the structure so that compiler options like pragma pack
    // or malign-double will have no effect on the field offsets
    //
-    NvU8  padding8bits;
+    NvU16 padding16bits;
+
    NvU16 srcIndex;         // index of the gpu that received the access counter
                            // notification
-    NvU16 padding16bits;
    NvU32 value;
    NvU32 subGranularity;
    NvU32 tag;
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -664,10 +664,11 @@ static void uvm_va_block_cpu_clear_resident_region(uvm_va_block_t *va_block, int
    block_update_cpu_resident_mask(va_block);
 }

-// Clear residency bits from any/all processors that might have had pages resident.
-// Note that both the destination processor and any CPU NUMA nodes where pages are
-// migrating to need to be skipped as the block logic sets the new page residency
-// before clearing the old ones (see uvm_va_block_make_resident_finish()).
+// Clear residency bits from any/all processors that might have had pages
+// resident. Note that both the destination processor and any CPU NUMA nodes
+// where pages are migrating to need to be skipped as the block logic sets the
+// new page residency before clearing the old ones
+// (see uvm_va_block_make_resident_finish()).
 static void uvm_va_block_cpu_clear_resident_all_chunks(uvm_va_block_t *va_block,
                                                       uvm_va_block_context_t *va_block_context,
                                                       uvm_page_mask_t *page_mask)
@@ -1328,40 +1329,18 @@ static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_
    if (gpu_mapping_addr == 0)
        return;

-    uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
    uvm_cpu_chunk_unmap_gpu(chunk, gpu);
 }

-static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
-                                                  uvm_va_block_t *block,
-                                                  uvm_page_index_t page_index,
-                                                  uvm_gpu_t *gpu)
+static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
 {
-    NV_STATUS status;
-    uvm_chunk_size_t chunk_size;
-
    // When the Confidential Computing feature is enabled the transfers don't
    // use the DMA mapping of CPU chunks (since it's protected memory), but
    // the DMA address of the unprotected dma buffer.
    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

-    status = uvm_cpu_chunk_map_gpu(chunk, gpu);
-    if (status != NV_OK)
-        return status;
-
-    chunk_size = uvm_cpu_chunk_get_size(chunk);
-
-    status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
-                                                     uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
-                                                     uvm_va_block_cpu_page_address(block, page_index),
-                                                     chunk_size,
-                                                     block,
-                                                     UVM_ID_CPU);
-    if (status != NV_OK)
-        uvm_cpu_chunk_unmap_gpu(chunk, gpu);
-
-    return status;
+    return uvm_cpu_chunk_map_gpu(chunk, gpu);
 }

 static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu)
@@ -1393,7 +1372,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
                           uvm_id_value(gpu->id),
                           uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));

-            status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
+            status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
            if (status != NV_OK)
                goto error;
        }
@@ -1468,14 +1447,10 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
    }
 }

-NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
-                                             uvm_cpu_chunk_t *chunk,
-                                             uvm_page_index_t page_index)
+NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t *chunk)
 {
    NV_STATUS status;
    uvm_gpu_id_t id;
-    uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
-    uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block, chunk_size, page_index);

    // We can't iterate over va_space->registered_gpus because we might be
    // on the eviction path, which does not have the VA space lock held. We have
@@ -1489,7 +1464,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
            continue;

        gpu = uvm_gpu_get(id);
-        status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, chunk_region.first, gpu);
+        status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
        if (status != NV_OK)
            goto error;
    }
@@ -1756,7 +1731,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
            // before mapping.
            chunk_ptr = split_chunks[i];
            split_chunks[i] = NULL;
-            status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
+            status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
            if (status != NV_OK)
                goto done;
        }
@@ -1793,7 +1768,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
                    // before mapping.
                    chunk_ptr = small_chunks[j];
                    small_chunks[j] = NULL;
-                    status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
+                    status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
                    if (status != NV_OK)
                        goto done;
                }
@@ -1860,7 +1835,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block,
        if (status != NV_OK)
            goto out;

-        status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk, page_index);
+        status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk);
        if (status != NV_OK) {
            uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index);
            goto out;
@@ -3155,8 +3130,8 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
            uvm_page_mask_or(pages_staged, pages_staged, scratch_page_mask);
        }

-        //   2. Remove any pages in pages_staged that are on any resident processor
-        //      dest_id can copy from.
+        //   2. Remove any pages in pages_staged that are on any resident
+        //      processor dest_id can copy from.
        if (uvm_processor_mask_and(tmp_processor_mask, can_copy_from_processors, &block->resident)) {
            for_each_id_in_mask(id, tmp_processor_mask) {
                id_resident_mask = uvm_va_block_resident_mask_get(block, id, NUMA_NO_NODE);
@@ -3210,14 +3185,21 @@ static uvm_gpu_chunk_t *block_phys_page_chunk(uvm_va_block_t *block, block_phys_
    return chunk;
 }

+typedef enum {
+    REMOTE_EGM_ALLOWED = 0,
+    REMOTE_EGM_NOT_ALLOWED = 1,
+} remote_egm_mode_t;
+
 // Get the physical GPU address of a block's page from the POV of the specified
 // GPU. This is the address that should be used for making PTEs for the
 // specified GPU.
 static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
                                                      block_phys_page_t block_page,
-                                                      uvm_gpu_t *gpu)
+                                                      uvm_gpu_t *gpu,
+                                                      remote_egm_mode_t egm_mode)
 {
    uvm_va_block_gpu_state_t *accessing_gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
+    bool allow_remote_egm = egm_mode == REMOTE_EGM_ALLOWED;
    size_t chunk_offset;
    uvm_gpu_chunk_t *chunk;

@@ -3231,7 +3213,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
        uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
        uvm_parent_gpu_t *routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, block_page.nid);

-        if (routing_gpu) {
+        if (routing_gpu && (allow_remote_egm || routing_gpu == gpu->parent)) {
            struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);

            phys_addr = page_to_phys(page);
@@ -3296,9 +3278,14 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
    // CPU and local GPU accesses can rely on block_phys_page_address, but the
    // resulting physical address may need to be converted into virtual.
    if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id)) {
-        uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu);
+        // Do not use remote EGM addresses internally until
+        // NVLINK STO handling is updated to handle EGM.
+        // TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
+        //                    when accessing EGM memory
+        // TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
+        //                    systems
+        uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_NOT_ALLOWED);

-        // EGM mappings use physical addresses with a PEER aperture.
        if (uvm_aperture_is_peer(phys_addr.aperture)) {
            UVM_ASSERT(block_check_egm_peer(uvm_va_block_get_va_space(block), gpu, block_page.nid, phys_addr));
            return uvm_gpu_address_from_phys(phys_addr);
@@ -3334,7 +3321,7 @@ uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_blo
        UVM_ASSERT(nid != NUMA_NO_NODE);
    }

-    return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu);
+    return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu, REMOTE_EGM_ALLOWED);
 }

 uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
@@ -3949,9 +3936,9 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
        UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
        UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));

-        // CPU-to-CPU copies using memcpy() don't have any inherent ordering with
-        // copies using GPU CEs. So, we have to make sure that all previously
-        // submitted work is complete.
+        // CPU-to-CPU copies using memcpy() don't have any inherent ordering
+        // with copies using GPU CEs. So, we have to make sure that all
+        // previously submitted work is complete.
        status = uvm_tracker_wait(&va_block->tracker);
        if (status != NV_OK)
            return status;
@@ -4204,9 +4191,9 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
                uvm_processor_mask_set(&block_context->make_resident.all_involved_processors, copying_gpu->id);

                // This function is called just once per VA block and needs to
-                // receive the "main" cause for the migration (it mainly checks if
-                // we are in the eviction path). Therefore, we pass cause instead
-                // of contig_cause
+                // receive the "main" cause for the migration (it mainly checks
+                // if we are in the eviction path). Therefore, we pass cause
+                // instead of contig_cause.
                uvm_tools_record_block_migration_begin(block,
                                                       &push,
                                                       dst_id,
@@ -4233,8 +4220,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
            contig_cause = page_cause;

            if (block_copy_should_use_push(block, &copy_state)) {
-                // When CC is enabled, transfers between GPU and CPU don't rely on
-                // any GPU mapping of CPU chunks, physical or virtual.
+                // When CC is enabled, transfers between GPU and CPU don't rely
+                // on any GPU mapping of CPU chunks, physical or virtual.
                if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
                    can_cache_src_phys_addr = false;

@@ -4244,8 +4231,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
                // Computing the physical address is a non-trivial operation and
                // seems to be a performance limiter on systems with 2 or more
                // NVLINK links. Therefore, for physically-contiguous block
-                // storage, we cache the start address and compute the page address
-                // using the page index.
+                // storage, we cache the start address and compute the page
+                // address using the page index.
                if (can_cache_src_phys_addr) {
                    copy_state.src.gpu_address = block_phys_page_copy_address(block,
                                                                              block_phys_page(src_id,
@@ -5187,12 +5174,13 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
    if (!scratch_residency_mask)
        return NV_ERR_NO_MEMORY;

-    // We cannot read-duplicate on different CPU NUMA nodes since there is only one
-    // CPU page table. So, the page has to migrate from the source NUMA node to the
-    // destination one.
+    // We cannot read-duplicate on different CPU NUMA nodes since there is only
+    // one CPU page table. So, the page has to migrate from the source NUMA node
+    // to the destination one.
    // In order to correctly map pages on the destination NUMA node, all pages
    // resident on other NUMA nodes have to be unmapped. Otherwise, their WRITE
-    // permission will be revoked but they'll remain mapped on the source NUMA node.
+    // permission will be revoked but they'll remain mapped on the source NUMA
+    // node.
    if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
        UVM_ID_IS_CPU(va_block_context->make_resident.dest_id)) {
        uvm_page_mask_t *dest_nid_resident = uvm_va_block_resident_mask_get(va_block,
@@ -5623,7 +5611,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
    }

    // atomic mappings from GPUs with disabled system-wide atomics are treated
-    // as write mappings. Therefore, we remove them from the atomic mappings mask
+    // as write mappings. Therefore, we remove them from the atomic mappings
+    // mask
    uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);

    if (!uvm_processor_mask_empty(read_mappings)) {
@@ -5696,7 +5685,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
                           *residency_has_native_atomics->bitmap,
                           *va_space->system_wide_atomics_enabled_processors.bitmap);

-            // Only one processor outside of the native group can have atomics enabled
+            // Only one processor outside of the native group can have atomics
+            // enabled
            UVM_ASSERT_MSG(uvm_processor_mask_get_count(atomic_mappings) == 1,
                           "Too many atomics mappings to %s from processors with non-native atomics\n"
                           "Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
@@ -5714,9 +5704,9 @@ static bool block_check_mappings_page(uvm_va_block_t *block,

            non_native_atomics = &mapping_masks->non_native_atomics;

-            // One or more processors within the native group have atomics enabled.
-            // All processors outside of that group may have write but not atomic
-            // permissions.
+            // One or more processors within the native group have atomics
+            // enabled. All processors outside of that group may have write but
+            // not atomic permissions.
            uvm_processor_mask_andnot(non_native_atomics, atomic_mappings, residency_has_native_atomics);

            UVM_ASSERT_MSG(uvm_processor_mask_empty(non_native_atomics),
@@ -6143,7 +6133,10 @@ static void block_gpu_pte_write_4k(uvm_va_block_t *block,

        if (page_index >= contig_region.outer || nid != contig_nid) {
            contig_region = block_phys_contig_region(block, page_index, resident_id, nid);
-            contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
+            contig_addr = block_phys_page_address(block,
+                                                  block_phys_page(resident_id, nid, contig_region.first),
+                                                  gpu,
+                                                  REMOTE_EGM_ALLOWED);
            page_addr = contig_addr;
            contig_nid = nid;
        }
@@ -6368,7 +6361,10 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,

        if (big_region.first >= contig_region.outer || nid != contig_nid) {
            contig_region = block_phys_contig_region(block, big_region.first, resident_id, nid);
-            contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
+            contig_addr = block_phys_page_address(block,
+                                                  block_phys_page(resident_id, nid, contig_region.first),
+                                                  gpu,
+                                                  REMOTE_EGM_ALLOWED);
            page_addr = contig_addr;
            contig_nid = nid;
        }
@@ -6520,7 +6516,7 @@ static void block_gpu_pte_write_2m(uvm_va_block_t *block,
            block_mark_cpu_page_dirty(block, 0, nid);
    }

-    page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu);
+    page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu, REMOTE_EGM_ALLOWED);
    pte_val = tree->hal->make_pte(page_addr.aperture, page_addr.address, new_prot, pte_flags);
    uvm_pte_batch_write_pte(pte_batch, pte_addr, pte_val, pte_size);

@@ -10037,16 +10033,8 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
    uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
    uvm_chunk_size_t new_size;
-    uvm_gpu_t *gpu;
-    NvU64 gpu_mapping_addr;
-    uvm_processor_mask_t *gpu_split_mask;
-    uvm_gpu_id_t id;
    NV_STATUS status;

-    gpu_split_mask = uvm_processor_mask_cache_alloc();
-    if (!gpu_split_mask)
-        return NV_ERR_NO_MEMORY;
-
    if (chunk_size == UVM_CHUNK_SIZE_2M)
        new_size = UVM_CHUNK_SIZE_64K;
    else
@@ -10054,45 +10042,11 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index

    UVM_ASSERT(IS_ALIGNED(chunk_size, new_size));

-    uvm_processor_mask_zero(gpu_split_mask);
-    for_each_gpu_id(id) {
-        if (!uvm_va_block_gpu_state_get(block, id))
-            continue;
-
-        gpu = uvm_gpu_get(id);
-
-        // If the parent chunk has not been mapped, there is nothing to split.
-        gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
-        if (gpu_mapping_addr == 0)
-            continue;
-
-        status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
-                                                            gpu_mapping_addr,
-                                                            new_size);
-        if (status != NV_OK)
-            goto merge;
-
-        uvm_processor_mask_set(gpu_split_mask, id);
-    }
-
    if (new_size == UVM_CHUNK_SIZE_64K)
        status = block_split_cpu_chunk_to_64k(block, nid);
    else
        status = block_split_cpu_chunk_to_4k(block, page_index, nid);

-    if (status != NV_OK) {
-merge:
-        for_each_gpu_id_in_mask(id, gpu_split_mask) {
-            gpu = uvm_gpu_get(id);
-            gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
-            uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
-                                                       gpu_mapping_addr,
-                                                       chunk_size);
-        }
-    }
-
-    uvm_processor_mask_cache_free(gpu_split_mask);
-
    return status;
 }

@@ -10109,8 +10063,8 @@ static NV_STATUS block_prealloc_cpu_chunk_storage(uvm_va_block_t *existing, uvm_
    UVM_ASSERT(uvm_cpu_storage_get_type(node_state) == UVM_CPU_CHUNK_STORAGE_MIXED);
    existing_mixed = uvm_cpu_storage_get_ptr(node_state);

-    // Pre-allocate chunk storage for the new block. By definition, the new block
-    // will contain either 64K and/or 4K chunks.
+    // Pre-allocate chunk storage for the new block. By definition, the new
+    // block will contain either 64K and/or 4K chunks.
    //
    // We do this here so there are no failures in block_split_cpu().
    new_mixed = uvm_kvmalloc_zero(sizeof(*new_mixed));
@@ -10182,8 +10136,8 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
    for_each_possible_uvm_node(nid) {
        splitting_chunk = uvm_cpu_chunk_get_chunk_for_page(existing, nid, page_index);

-        // If the page covering the split point has not been populated, there is no
-        // need to split.
+        // If the page covering the split point has not been populated, there is
+        // no need to split.
        if (!splitting_chunk)
            continue;

@@ -10247,7 +10201,6 @@ static void block_merge_cpu_chunks_to_2m(uvm_va_block_t *block, uvm_page_index_t
 static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t page_index, int nid)
 {
    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
-    uvm_gpu_id_t id;

    if (!chunk)
        return;
@@ -10259,25 +10212,6 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
        UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_64K);
        block_merge_cpu_chunks_to_2m(block, page_index, nid);
    }
-
-    chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
-
-    for_each_gpu_id(id) {
-        NvU64 gpu_mapping_addr;
-        uvm_gpu_t *gpu;
-
-        if (!uvm_va_block_gpu_state_get(block, id))
-            continue;
-
-        gpu = uvm_gpu_get(id);
-        gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
-        if (gpu_mapping_addr == 0)
-            continue;
-
-        uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
-                                                   gpu_mapping_addr,
-                                                   uvm_cpu_chunk_get_size(chunk));
-    }
 }

 static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
@@ -10695,9 +10629,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
    size_t new_pages = uvm_va_block_num_cpu_pages(new);
    size_t existing_pages, existing_pages_4k, existing_pages_big, new_pages_big;
    uvm_pte_bits_gpu_t pte_bit;
-    uvm_cpu_chunk_t *cpu_chunk;
-    uvm_page_index_t page_index;
-    int nid;

    if (!existing_gpu_state)
        return;
@@ -10711,14 +10642,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
    UVM_ASSERT(PAGE_ALIGNED(existing->start));
    existing_pages = (new->start - existing->start) / PAGE_SIZE;

-    for_each_possible_uvm_node(nid) {
-        for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
-            uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
-                                                         uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
-                                                         new);
-        }
-    }
-
    block_copy_split_gpu_chunks(existing, new, gpu);

    block_split_page_mask(&existing_gpu_state->resident,
@@ -10727,8 +10650,10 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
                          new_pages);

    for (pte_bit = 0; pte_bit < UVM_PTE_BITS_GPU_MAX; pte_bit++) {
-        block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit], existing_pages,
-                              &new_gpu_state->pte_bits[pte_bit], new_pages);
+        block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit],
+                              existing_pages,
+                              &new_gpu_state->pte_bits[pte_bit],
+                              new_pages);
    }

    // Adjust page table ranges.
@@ -11113,7 +11038,8 @@ static NV_STATUS do_block_add_mappings_after_migration(uvm_va_block_t *va_block,
        bool map_processor_has_enabled_system_wide_atomics =
            uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, map_processor_id);

-        // Write mappings from processors with disabled system-wide atomics are treated like atomics
+        // Write mappings from processors with disabled system-wide atomics are
+        // treated like atomics
        if (new_map_prot == UVM_PROT_READ_WRITE && !map_processor_has_enabled_system_wide_atomics)
            final_map_prot = UVM_PROT_READ_WRITE_ATOMIC;
        else
@@ -11346,14 +11272,17 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block

        block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE_ATOMIC, atomic_mappings);

-        // Exclude processors with system-wide atomics disabled from atomic_mappings
+        // Exclude processors with system-wide atomics disabled from
+        // atomic_mappings
        uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);

-        // Exclude the processor for which the mapping protections are being computed
+        // Exclude the processor for which the mapping protections are being
+        // computed
        uvm_processor_mask_clear(atomic_mappings, processor_id);

-        // If there is any processor with atomic mapping, check if it has native atomics to the processor
-        // with the resident copy. If it does not, we can only map READ ONLY
+        // If there is any processor with atomic mapping, check if it has native
+        // atomics to the processor with the resident copy. If it does not, we
+        // can only map READ ONLY
        atomic_id = uvm_processor_mask_find_first_id(atomic_mappings);
        if (UVM_ID_IS_VALID(atomic_id) &&
            !uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], atomic_id)) {
@@ -11364,7 +11293,8 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block

        block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE, write_mappings);

-        // Exclude the processor for which the mapping protections are being computed
+        // Exclude the processor for which the mapping protections are being
+        // computed
        uvm_processor_mask_clear(write_mappings, processor_id);

        // At this point, any processor with atomic mappings either has native
@@ -11639,31 +11569,32 @@ static uvm_processor_id_t block_select_processor_residency(uvm_va_block_t *va_bl
        uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
        return preferred_location;

-    // Check if we should map the closest resident processor remotely on remote CPU fault
+    // Check if we should map the closest resident processor remotely on remote
+    // CPU fault
    //
-    // When faulting on CPU, there's a linux process on behalf of it, which is associated
-    // with a unique VM pointed by current->mm. A block of memory residing on GPU is also
-    // associated with VM, pointed by va_block_context->mm. If they match, it's a regular
-    // (local) fault, and we may want to migrate a page from GPU to CPU.
-    // If it's a 'remote' fault, i.e. linux process differs from one associated with block
-    // VM, we might preserve residence.
+    // When faulting on CPU, there's a linux process on behalf of it, which is
+    // associated with a unique VM pointed by current->mm. A block of memory
+    // residing on GPU is also associated with VM, pointed by
+    // va_block_context->mm. If they match, it's a regular (local) fault, and we
+    // may want to migrate a page from GPU to CPU. If it's a 'remote' fault,
+    // i.e., linux process differs from one associated with block VM, we might
+    // preserve residence.
    //
-    // Establishing a remote fault without access counters means the memory could stay in
-    // the wrong spot for a long time, which is why we prefer to avoid creating remote
-    // mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
-    // in place for NIC accesses.
+    // Establishing a remote fault without access counters means the memory
+    // could stay in the wrong spot for a long time, which is why we prefer to
+    // avoid creating remote mappings. However when NIC accesses a memory
+    // residing on GPU, it's worth to keep it in place for NIC accesses.
    //
-    // The logic that's used to detect remote faulting also keeps memory in place for
-    // ptrace accesses. We would prefer to control those policies separately, but the
-    // NIC case takes priority.
-    // If the accessing processor is CPU, we're either handling a fault
-    // from other than owning process, or we're handling an MOMC
-    // notification. Only prevent migration for the former.
+    // The logic that's used to detect remote faulting also keeps memory in
+    // place for ptrace accesses. We would prefer to control those policies
+    // separately, but the NIC case takes priority. If the accessing processor
+    // is the CPU, we're handling a fault from other than the owning process,
+    // we want to prevent a migration.
    if (UVM_ID_IS_CPU(processor_id) &&
-        operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
        uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
        va_block_context->mm != current->mm) {
        UVM_ASSERT(va_block_context->mm != NULL);
+        UVM_ASSERT(operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS);
        return closest_resident_processor;
    }

@@ -11693,7 +11624,8 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
    // For HMM allocations UVM doesn't always control allocation of the
    // destination page as the kernel may have already allocated one. Therefore
    // we can't respect the preferred node ID for HMM pages.
-    // TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when making a HMM page resident
+    // TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when
+    // making a HMM page resident
    if (uvm_va_block_is_hmm(va_block))
        return NUMA_NO_NODE;

@@ -11867,9 +11799,12 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
            break;
        case UVM_SERVICE_OPERATION_ACCESS_COUNTERS:
            cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
+            service_context->block_context->make_resident.access_counters_buffer_index =
+                service_context->access_counters_buffer_index;
            break;
        default:
            UVM_ASSERT_MSG(false, "Invalid operation value %d\n", service_context->operation);
+
            // Set cause to silence compiler warning that it may be unused.
            cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
            break;
@@ -11955,16 +11890,21 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
            if (status != NV_OK)
                return status;

+            // TODO: Bug 5069427: [uvm] Fix the migration STO error checks.
            // Same as above for nvlink errors. Check the source GPU as well
            // as all its peers.
+            uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
            for_each_gpu_in_mask(peer_gpu, &gpu->peer_info.peer_gpu_mask) {
                status = uvm_gpu_check_nvlink_error_no_rm(peer_gpu);
                if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
                    uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, peer_gpu->id);

-                if (status != NV_OK)
+                if (status != NV_OK) {
+                    uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
                    return status;
+                }
            }
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);

            status = uvm_gpu_check_nvlink_error_no_rm(gpu);
            if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
@@ -13542,7 +13482,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
        }
        else {
            params->resident_physical_address[count] =
-                block_phys_page_address(block, block_page, uvm_gpu_get(id)).address;
+                block_phys_page_address(block, block_page, uvm_gpu_get(id), REMOTE_EGM_ALLOWED).address;
        }

        ++count;
@@ -13572,7 +13512,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
        block_page = block_phys_page(processor_to_map, nid, page_index);
        if (!UVM_ID_IS_CPU(id)) {
            uvm_gpu_t *gpu = uvm_gpu_get(id);
-            uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu);
+            uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_ALLOWED);
            NvU64 phys_addr = gpu_phys_addr.address;

            if (UVM_ID_IS_CPU(block_page.processor)) {
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -205,12 +205,12 @@ typedef struct
    //
    // The indices represent the corresponding big PTEs in the block's interior.
    // For example, a block with alignment and size of one 4k page on either
-    // side of a big page will only use bit 0. Use uvm_va_block_big_page_index to look
-    // the big_ptes index of a page.
+    // side of a big page will only use bit 0. Use uvm_va_block_big_page_index
+    // to look up the big_ptes index of a page.
    //
    // The block might not be able to fit any big PTEs, in which case this
-    // bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the number of
-    // valid bits in this mask.
+    // bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the
+    // number of valid bits in this mask.
    DECLARE_BITMAP(big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);

    // See the comments for uvm_va_block_mmap_t::cpu.pte_bits.
@@ -565,8 +565,8 @@ struct uvm_va_block_wrapper_struct
        // testing only.
        bool inject_eviction_error;

-        // Force the next successful chunk allocation to then fail. Used for testing
-        // only to simulate driver metadata allocation failure.
+        // Force the next successful chunk allocation to then fail. Used for
+        // testing only to simulate driver metadata allocation failure.
        bool inject_populate_error;

        // Force the next split on this block to fail.
@@ -1250,8 +1250,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
 // context.
 //
 // service_context must not be NULL and policy for service_context->region must
-// match. See the comments for uvm_va_block_check_policy_is_valid().  If
-// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// match. See the comments for uvm_va_block_check_policy_is_valid(). If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
 // comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context->prefetch_hint is set by this function.
 //
@@ -1282,8 +1282,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
 // pages to new_residency.
 //
 // service_context must not be NULL and policy for service_context->region must
-// match.  See the comments for uvm_va_block_check_policy_is_valid().  If
-// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// match. See the comments for uvm_va_block_check_policy_is_valid(). If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
 // comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context->prefetch_hint should be set before calling this function.
 //
@@ -1311,8 +1311,8 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
 // to the new residency (which may be remote).
 //
 // service_context must not be NULL and policy for service_context->region must
-// match. See the comments for uvm_va_block_check_policy_is_valid().  If
-// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// match. See the comments for uvm_va_block_check_policy_is_valid(). If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
 // comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context must be initialized by calling uvm_va_block_service_copy()
 // before calling this function.
@@ -1499,8 +1499,8 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
 //
 // service_context and service_context->block_context must not be NULL and
 // policy for the region must match. See the comments for
-// uvm_va_block_check_policy_is_valid().  If va_block is a HMM block,
-// service->block_context->hmm.vma must be valid.  See the comments for
+// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
+// service->block_context->hmm.vma must be valid. See the comments for
 // uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 //
 // LOCKING: The caller must hold the va_block lock. If
@@ -1550,7 +1550,8 @@ void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
 // Frees all the remaining free chunks and unpins all the used chunks.
 void uvm_va_block_retry_deinit(uvm_va_block_retry_t *uvm_va_block_retry, uvm_va_block_t *va_block);

-// Evict all chunks from the block that are subchunks of the passed in root_chunk.
+// Evict all chunks from the block that are subchunks of the passed in
+// root_chunk.
 //
 // Add all the work tracking the eviction to the tracker.
 //
@@ -2139,16 +2140,12 @@ struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_
 struct page *uvm_va_block_get_cpu_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);

 // Physically map a CPU chunk so it is DMA'able from all registered GPUs.
-// nid cannot be NUMA_NO_NODE.
 // Locking: The va_block lock must be held.
-NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
-                                             uvm_cpu_chunk_t *chunk,
-                                             uvm_page_index_t page_index);
+NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);

 // Physically unmap a CPU chunk from all registered GPUs.
 // Locking: The va_block lock must be held.
-void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
-                                          uvm_cpu_chunk_t *chunk);
+void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);

 // Remove any CPU chunks in the given region.
 // Locking: The va_block lock must be held.
@@ -2163,8 +2160,7 @@ NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
                                     uvm_page_index_t page_index);

 // Get CPU page size or 0 if it is not mapped
-NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
-                                 uvm_page_index_t page_index);
+NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);

 // Get GPU page size or 0 if it is not mapped on the given GPU
 NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
@@ -2262,8 +2258,8 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
 // otherwise it will be initialized and deinitialized by the macro.
 //
 // The macro also locks and unlocks the block's lock internally as it's expected
-// that the block's lock has been unlocked and relocked whenever the function call
-// returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
+// that the block's lock has been unlocked and relocked whenever the function
+// call returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
 // block's state is not locked across these calls.
 #define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({     \
    NV_STATUS __status;                                             \
--- a/kernel-open/nvidia-uvm/uvm_va_block_types.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block_types.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -235,6 +235,10 @@ typedef struct

        // Event that triggered the call
        uvm_make_resident_cause_t cause;
+
+        // Access counters notification buffer index. Only valid when cause is
+        // UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER.
+        NvU32 access_counters_buffer_index;
    } make_resident;

    // State used by the mapping APIs (unmap, map, revoke). This could be used
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -558,7 +558,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
            nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);

        if (gpu->parent->access_counters_supported)
-            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
+            uvm_gpu_access_counters_disable(gpu, va_space);

    }

@@ -576,7 +576,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)

    uvm_deferred_free_object_list(&deferred_free_list);

-    // Normally we'd expect this to happen as part of uvm_mm_release()
+    // Normally we'd expect this to happen as part of uvm_release_mm()
    // but if userspace never initialized uvm_mm_fd that won't happen.
    // We don't have to take the va_space_mm spinlock and update state
    // here because we know no other thread can be in or subsequently
@@ -760,7 +760,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    bool gpu_can_access_sysmem = true;
    uvm_processor_mask_t *peers_to_release = NULL;

-    status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
+    status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &va_space->test.parent_gpu_error, &gpu);
    if (status != NV_OK)
        return status;

@@ -936,7 +936,7 @@ done:
        // registered GPU: the enablement step would have failed before even
        // discovering that the GPU is already registered.
        if (uvm_parent_gpu_access_counters_required(gpu->parent))
-            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
+            uvm_gpu_access_counters_disable(gpu, va_space);

        uvm_gpu_release(gpu);
    }
@@ -1011,7 +1011,7 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
    // acquires the VA space lock after the unregistration does. Both outcomes
    // result on valid states.
    if (disable_access_counters)
-        uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
+        uvm_gpu_access_counters_disable(gpu, va_space);

    // mmap_lock is needed to establish CPU mappings to any pages evicted from
    // the GPU if accessed by CPU is set for them.
@@ -2207,6 +2207,17 @@ NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *
    atomic_set(&va_space->test.migrate_vma_allocation_fail_nth, params->migrate_vma_allocation_fail_nth);
    atomic_set(&va_space->test.va_block_allocation_fail_nth, params->va_block_allocation_fail_nth);

+    va_space->test.parent_gpu_error.access_counters_alloc_buffer = params->gpu_access_counters_alloc_buffer;
+    va_space->test.parent_gpu_error.access_counters_alloc_block_context =
+        params->gpu_access_counters_alloc_block_context;
+    va_space->test.parent_gpu_error.access_counters_batch_context_notifications =
+        params->access_counters_batch_context_notifications;
+    va_space->test.parent_gpu_error.access_counters_batch_context_notification_cache =
+        params->access_counters_batch_context_notification_cache;
+    va_space->test.parent_gpu_error.isr_access_counters_alloc = params->gpu_isr_access_counters_alloc;
+    va_space->test.parent_gpu_error.isr_access_counters_alloc_stats_cpu =
+        params->gpu_isr_access_counters_alloc_stats_cpu;
+
    return NV_OK;
 }

--- a/kernel-open/nvidia-uvm/uvm_va_space.h
+++ b/kernel-open/nvidia-uvm/uvm_va_space.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -424,6 +424,8 @@ struct uvm_va_space_struct
        bool force_cpu_to_cpu_copy_with_ce;

        bool allow_allocation_from_movable;
+
+        uvm_test_parent_gpu_inject_error_t parent_gpu_error;
    } test;

    // Queue item for deferred f_ops->release() handling
--- a/kernel-open/nvidia-uvm/uvm_volta.c
+++ b/kernel-open/nvidia-uvm/uvm_volta.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -37,10 +37,10 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_volta_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Volta covers 128 TB and that's the minimum
@@ -82,9 +82,9 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = true;
+    // Although access counters are supported in HW, it only notifies memory
+    // accesses using physical addresses, which is not supported in SW.
+    parent_gpu->access_counters_supported = false;

    parent_gpu->fault_cancel_va_supported = true;

--- a/kernel-open/nvidia-uvm/uvm_volta_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_volta_access_counter_buffer.c
@@ -1,228 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_linux.h"
-#include "uvm_global.h"
-#include "uvm_gpu.h"
-#include "uvm_hal.h"
-#include "clc365.h"
-#include "uvm_volta_fault_buffer.h"
-
-typedef struct {
-    NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
-} access_counter_buffer_entry_c365_t;
-
-void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
-{
-    volatile NvU32 *reg;
-    NvU32 mask;
-
-    reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnSet;
-    mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
-
-    UVM_GPU_WRITE_ONCE(*reg, mask);
-}
-
-void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
-{
-    volatile NvU32 *reg;
-    NvU32 mask;
-
-    reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
-    mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
-
-    UVM_GPU_WRITE_ONCE(*reg, mask);
-}
-
-void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
-{
-    // No-op, this function is only used by pulse-based interrupt GPUs.
-}
-
-NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
-{
-    return NVC365_NOTIFY_BUF_SIZE;
-}
-
-static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
-{
-    NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
-
-    switch (hw_aperture_value) {
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
-            return UVM_APERTURE_VID;
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
-             return UVM_APERTURE_SYS;
-    }
-
-    UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
-    return UVM_APERTURE_MAX;
-}
-
-static uvm_aperture_t get_access_counter_aperture(NvU32 *access_counter_entry)
-{
-    NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, APERTURE);
-    NvU32 peer_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, PEER_ID);
-
-    switch (hw_aperture_value) {
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
-            return UVM_APERTURE_VID;
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM:
-            return UVM_APERTURE_PEER(peer_id);
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
-        case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
-             return UVM_APERTURE_SYS;
-    }
-
-    UVM_ASSERT_MSG(false, "Invalid aperture value: %d\n", hw_aperture_value);
-    return UVM_APERTURE_MAX;
-}
-
-static uvm_gpu_address_t get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
-{
-    NvU64 address;
-    bool is_virtual;
-    NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
-    NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
-    NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
-
-    address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
-    is_virtual = (addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
-
-    if (is_virtual) {
-        address = uvm_parent_gpu_canonical_address(parent_gpu, address);
-        return uvm_gpu_address_virtual(address);
-    }
-    else {
-        uvm_aperture_t aperture = get_access_counter_aperture(access_counter_entry);
-
-        UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
-        UVM_ASSERT_MSG(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA,
-                       "Invalid address type%u\n", addr_type_value);
-
-        return uvm_gpu_address_physical(aperture, address);
-    }
-}
-
-static uvm_access_counter_type_t get_access_counter_type(NvU32 *access_counter_entry)
-{
-    NvU32 type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE);
-    if (type_value == NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU)
-        return UVM_ACCESS_COUNTER_TYPE_MOMC;
-    else
-        return UVM_ACCESS_COUNTER_TYPE_MIMC;
-}
-
-static NvU32 *get_access_counter_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
-{
-    access_counter_buffer_entry_c365_t *buffer_start;
-    NvU32 *access_counter_entry;
-
-    UVM_ASSERT(index < parent_gpu->access_counter_buffer_info.max_notifications);
-
-    buffer_start = (access_counter_buffer_entry_c365_t *)parent_gpu->access_counter_buffer_info.rm_info.bufferAddress;
-    access_counter_entry = (NvU32 *)&buffer_start[index];
-
-    return access_counter_entry;
-}
-
-bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
-{
-    NvU32 *access_counter_entry;
-    bool is_valid;
-
-    access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
-
-    is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
-
-    return is_valid;
-}
-
-void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
-{
-    NvU32 *access_counter_entry;
-
-    access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
-
-    WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
-}
-
-void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                                     NvU32 index,
-                                                     uvm_access_counter_buffer_entry_t *buffer_entry)
-{
-    NvU32 *access_counter_entry;
-
-    // Valid bit must be set before this function is called
-    UVM_ASSERT(uvm_hal_volta_access_counter_buffer_entry_is_valid(parent_gpu, index));
-
-    access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
-
-    buffer_entry->counter_type = get_access_counter_type(access_counter_entry);
-
-    buffer_entry->address = get_address(parent_gpu, access_counter_entry);
-
-    if (buffer_entry->address.is_virtual) {
-        NvU64 inst_hi, inst_lo;
-
-        inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
-        inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
-        buffer_entry->virtual_info.instance_ptr.address =
-            inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
-
-        // HW value contains the 4K page number. Shift to build the full address
-        buffer_entry->virtual_info.instance_ptr.address <<= 12;
-
-        buffer_entry->virtual_info.instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
-
-        buffer_entry->virtual_info.mmu_engine_id =
-            READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
-
-        buffer_entry->virtual_info.mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
-
-        // MMU engine id aligns with the fault buffer packets. Therefore, we
-        // reuse the helper to compute the VE ID from the fault buffer class.
-        buffer_entry->virtual_info.ve_id =
-            parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->virtual_info.mmu_engine_id,
-                                                    buffer_entry->virtual_info.mmu_engine_type);
-    }
-    else if (buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
-        // Ignore any set bit beyond 47 since it is the maximum physical address
-        // supported by the GPU. See the definition of
-        // uvm_gpu_t::dma_addressable_start for why higher bits might be set.
-        const NvU64 mask_46_0 = (0x1UL << 47) - 1;
-        buffer_entry->address.address &= mask_46_0;
-    }
-
-    buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
-
-    buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
-
-    buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
-
-    buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
-
-    // Automatically clear valid bit for the entry in the access counter buffer
-    uvm_hal_volta_access_counter_buffer_entry_clear_valid(parent_gpu, index);
-}
--- a/kernel-open/nvidia-uvm/uvm_volta_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_volta_fault_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,7 +38,7 @@ typedef struct {

 NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
+    NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
    NvU32 index = READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, PTR);
    UVM_ASSERT(READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, GETPTR_CORRUPTED) ==
               NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO);
@@ -48,8 +48,8 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)

 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
-    UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
+    NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
+    UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);

    return READ_HWVALUE(get, _PFB_PRI_MMU, FAULT_BUFFER_GET, PTR);
 }
@@ -58,7 +58,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
 {
    NvU32 get = HWVALUE(_PFB_PRI_MMU, FAULT_BUFFER_GET, PTR, index);

-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);

    // If HW has detected an overflow condition (PUT == GET - 1 and a fault has
    // arrived, which is dropped due to no more space in the fault buffer), it
@@ -70,7 +70,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
    // resulting in the overflow condition being instantly reasserted. However,
    // if the index is updated first and then the OVERFLOW bit is cleared such
    // a collision will not cause a reassertion of the overflow condition.
-    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
+    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);

    // Clearing GETPTR_CORRUPTED and OVERFLOW is not needed when GSP-RM owns
    // the HW replayable fault buffer, because UVM does not write to the actual
@@ -82,7 +82,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
    // Clear the GETPTR_CORRUPTED and OVERFLOW bits.
    get |= HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, GETPTR_CORRUPTED, CLEAR) |
           HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, OVERFLOW, CLEAR);
-    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
+    UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
 }

 // TODO: Bug  1835884: [uvm] Query the maximum number of subcontexts from RM
@@ -234,9 +234,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
    fault_buffer_entry_c369_t *buffer_start;
    NvU32 *fault_entry;

-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);

-    buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
+    buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
    fault_entry = (NvU32 *)&buffer_start[index];

    return fault_entry;
@@ -247,10 +247,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
 {
    UvmFaultMetadataPacket *fault_entry_metadata;

-    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

-    fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
+    fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
    UVM_ASSERT(fault_entry_metadata != NULL);

    return fault_entry_metadata + index;
@@ -359,7 +359,7 @@ static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
        UVM_ASSERT(gpc_utlb_id < parent_gpu->utlb_per_gpc_count);

        utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
-        UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
+        UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);

        buffer_entry->fault_source.utlb_id = utlb_id;
    }
--- a/kernel-open/nvidia-uvm/uvm_volta_host.c
+++ b/kernel-open/nvidia-uvm/uvm_volta_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -136,64 +136,6 @@ void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
                                    clear_type_value);
 }

-void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push)
-{
-    NV_PUSH_4U(C36F, MEM_OP_A, 0,
-                     MEM_OP_B, 0,
-                     MEM_OP_C, 0,
-                     MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
-                               HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
-}
-
-static NvU32 get_access_counter_type_value(uvm_access_counter_type_t type)
-{
-    if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
-        return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC;
-    else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
-        return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC;
-    else
-        UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
-
-    return 0;
-}
-
-static NvU32 get_access_counter_targeted_type_value(uvm_access_counter_type_t type)
-{
-    if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
-        return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC;
-    else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
-        return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC;
-    else
-        UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
-
-    return 0;
-}
-
-void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type)
-{
-    NvU32 type_value = get_access_counter_type_value(type);
-
-    NV_PUSH_4U(C36F, MEM_OP_A, 0,
-                     MEM_OP_B, 0,
-                     MEM_OP_C, 0,
-                     MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
-                               HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, type_value));
-}
-
-void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
-                                                 const uvm_access_counter_buffer_entry_t *buffer_entry)
-{
-    NvU32 targeted_type_value = get_access_counter_targeted_type_value(buffer_entry->counter_type);
-
-    NV_PUSH_4U(C36F, MEM_OP_A, 0,
-                     MEM_OP_B, 0,
-                     MEM_OP_C, HWVALUE(C36F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
-                     MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
-                               HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
-                               HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, targeted_type_value) |
-                               HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
-}
-
 void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
                                          uvm_gpu_phys_address_t pdb,
                                          NvU32 depth,