550.120

2026-02-02 06:29:47 +00:00 · 2024-09-20 12:40:39 -07:00
parent 2cca8b3fd5
commit 5e52edb203
93 changed files with 1186 additions and 712 deletions
--- a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -81,7 +81,7 @@
 #define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)

 // This exists in order to have a function to place a breakpoint on:
-void on_nvq_assert(void)
+static void on_nvq_assert(void)
 {
    (void)NULL;
 }
--- a/kernel-open/nvidia-uvm/nv-kthread-q.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
 {

    unsigned i, j;
-    const static unsigned attempts = 3;
+    static const unsigned attempts = 3;
    struct task_struct *thread[3];

    for (i = 0;; i++) {
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -158,6 +158,12 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,

    NvU64 completed_value = uvm_channel_update_completed_value(channel);

+    // LCIC channels don't use gpfifo entries after the static schedule is up.
+    // They can only have one entry active at a time so use the state of the
+    // tracking semaphore to represent progress.
+    if (uvm_channel_is_lcic(channel) && uvm_channel_manager_is_wlc_ready(channel->pool->manager))
+        return uvm_gpu_tracking_semaphore_is_completed(&channel->tracking_sem) ? 0 : 1;
+
    channel_pool_lock(channel->pool);

    // Completed value should never exceed the queued value
@@ -397,18 +403,15 @@ static NV_STATUS channel_pool_rotate_key_locked(uvm_channel_pool_t *pool)
    uvm_assert_mutex_locked(&pool->conf_computing.key_rotation.mutex);

    uvm_for_each_channel_in_pool(channel, pool) {
-        NV_STATUS status = uvm_channel_wait(channel);
+        // WLC channels share CE with LCIC pushes and LCIC waits for
+        // WLC work to complete using WFI, so it's enough to wait
+        // for the latter one.
+        uvm_channel_t *wait_channel = uvm_channel_is_wlc(channel) ? uvm_channel_wlc_get_paired_lcic(channel) : channel;
+
+        NV_STATUS status = uvm_channel_wait(wait_channel);
        if (status != NV_OK)
            return status;

-        if (uvm_channel_pool_is_wlc(pool)) {
-            uvm_spin_loop_t spin;
-            uvm_channel_t *lcic_channel = uvm_channel_wlc_get_paired_lcic(channel);
-
-            // LCIC pushes don't exist as such. Rely on the tracking semaphore
-            // to determine completion, instead of uvm_channel_wait
-            UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&lcic_channel->tracking_sem), &spin);
-        }
    }

    return uvm_conf_computing_rotate_pool_key(pool);
@@ -1051,13 +1054,21 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
    UvmCslIv *iv_cpu_addr = lcic_semaphore->conf_computing.ivs;
    uvm_gpu_semaphore_notifier_t *last_pushed_notifier;
    NvU32 iv_index;
-    uvm_spin_loop_t spin;
+    NV_STATUS status;
    void* auth_tag_cpu = get_channel_unprotected_sysmem_cpu(wlc_channel) + WLC_SYSMEM_PUSHBUFFER_AUTH_TAG_OFFSET;


    // Wait for the WLC/LCIC to be primed. This means that PUT == GET + 2
    // and a WLC doorbell ring is enough to start work.
-    UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&lcic_channel->tracking_sem), &spin);
+    status = uvm_channel_wait(lcic_channel);
+    if (status != NV_OK) {
+        UVM_ASSERT(uvm_global_get_status() != NV_OK);
+
+        // If there's a global fatal error we can't communicate with the GPU
+        // and the below launch sequence doesn't work.
+        UVM_ERR_PRINT_NV_STATUS("Failed to wait for LCIC channel (%s) completion.", status, lcic_channel->name);
+        return;
+    }

    // Executing WLC adds an extra job to LCIC
    ++lcic_channel->tracking_sem.queued_value;
@@ -1852,14 +1863,14 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
 NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
 {
    uvm_gpu_t *gpu;
-    NvNotification *errorNotifier;
+    NvNotification *error_notifier;

    if (uvm_channel_is_proxy(channel))
-        errorNotifier = channel->proxy.channel_info.shadowErrorNotifier;
+        error_notifier = channel->proxy.channel_info.shadowErrorNotifier;
    else
-        errorNotifier = channel->channel_info.errorNotifier;
+        error_notifier = channel->channel_info.errorNotifier;

-    if (errorNotifier->status == 0)
+    if (error_notifier->status == 0)
        return NV_OK;

    // In case we hit a channel error, check the ECC error notifier as well so
@@ -2986,16 +2997,18 @@ out:

 // Return the pool corresponding to the given CE index
 //
-// This function cannot be used to access the proxy pool in SR-IOV heavy.
+// Used to retrieve pools of type UVM_CHANNEL_POOL_TYPE_CE only.
 static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
 {
-    uvm_channel_pool_t *pool;
+    uvm_channel_pool_t *pool = uvm_channel_pool_first(manager, UVM_CHANNEL_POOL_TYPE_CE);

+    UVM_ASSERT(pool != NULL);
    UVM_ASSERT(test_bit(ce, manager->ce_mask));

-    // The index of the pool associated with 'ce' is the number of usable CEs
-    // in [0, ce)
-    pool = manager->channel_pools + bitmap_weight(manager->ce_mask, ce);
+    // Pools of type UVM_CHANNEL_POOL_TYPE_CE are stored contiguously. The
+    // offset of the pool associated with 'ce' is the number of usable CEs in
+    // [0, ce).
+    pool += bitmap_weight(manager->ce_mask, ce);

    UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
    UVM_ASSERT(pool->engine_index == ce);
@@ -3009,6 +3022,8 @@ void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *p

    UVM_ASSERT(manager->gpu != peer);
    UVM_ASSERT(optimal_ce < UVM_COPY_ENGINE_COUNT_MAX);
+    UVM_ASSERT(manager->gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);
+    UVM_ASSERT(peer->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);

    manager->pool_to_use.gpu_to_gpu[peer_gpu_index] = channel_manager_ce_pool(manager, optimal_ce);
 }
@@ -3213,6 +3228,7 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
 static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
 {
    unsigned ce;
+    unsigned type;

    // A pool is created for each usable CE, even if it has not been selected as
    // the preferred CE for any type, because as more information is discovered
@@ -3222,18 +3238,20 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
    // usable.
    for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
        NV_STATUS status;
-        unsigned type;
        uvm_channel_pool_t *pool = NULL;

        status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
        if (status != NV_OK)
            return status;
+    }

-        for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
-            // Set pool type if it hasn't been set before.
-            if (preferred_ce[type] == ce && manager->pool_to_use.default_for_type[type] == NULL)
-                manager->pool_to_use.default_for_type[type] = pool;
-        }
+    for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
+        // Avoid overwriting previously set defaults.
+        if (manager->pool_to_use.default_for_type[type] != NULL)
+            continue;
+
+        ce = preferred_ce[type];
+        manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
    }

    return NV_OK;
@@ -3739,11 +3757,15 @@ static void channel_manager_stop_wlc(uvm_channel_manager_t *manager)
    NV_STATUS status;

    uvm_for_each_channel_in_pool(channel, lcic_pool) {
-        uvm_spin_loop_t spin;
-
        // Wait for the WLC/LCIC to be primed. This means that PUT == GET + 2
        // and a WLC doorbell ring is enough to start work.
-        UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&channel->tracking_sem), &spin);
+        status = uvm_channel_wait(channel);
+        if (status != NV_OK)
+            UVM_ERR_PRINT_NV_STATUS("Failed to wait for LCIC channel (%s) completion", status, channel->name);
+
+        // Continue on error and attempt to stop WLC below. This can lead to
+        // channel destruction with mismatched GET and PUT pointers. RM will
+        // print errors if that's the case, but channel destruction succeeeds.
    }

    status = uvm_push_begin(manager, UVM_CHANNEL_TYPE_SEC2, &push, "Stop WLC channels");
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -2256,7 +2256,10 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
    bool sorted;
    NvU32 ce0, ce1;

-    if (peer_caps->link_type < UVM_GPU_LINK_NVLINK_1)
+    UVM_ASSERT(peer_caps->ref_count);
+    UVM_ASSERT(gpu0->parent->peer_copy_mode == gpu1->parent->peer_copy_mode);
+
+    if (gpu0->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED)
        return;

    sorted = uvm_id_value(gpu0->id) < uvm_id_value(gpu1->id);
@@ -2282,7 +2285,7 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
 static int nv_procfs_read_gpu_peer_caps(struct seq_file *s, void *v)
 {
    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_peer_caps_print((uvm_gpu_t **)s->private, s);

--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -962,6 +962,8 @@ struct uvm_parent_gpu_struct
    // Whether CE supports physical addressing mode for writes to vidmem
    bool ce_phys_vidmem_write_supported;

+    // Addressing mode(s) supported for CE transfers between this GPU and its
+    // peers: none, physical only, physical and virtual, etc.
    uvm_gpu_peer_copy_mode_t peer_copy_mode;

    // Virtualization mode of the GPU.
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -684,7 +684,10 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,

    while (get != put) {
        // Wait until valid bit is set
-        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
+        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
+            if (uvm_global_get_status() != NV_OK)
+                goto done;
+        }

        parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
        ++get;
@@ -692,6 +695,7 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
            get = 0;
    }

+done:
    write_get(parent_gpu, get);
 }

@@ -817,12 +821,18 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
           (fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
        uvm_access_counter_buffer_entry_t *current_entry = &notification_cache[notification_index];

-        // We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
-        // individually since entries can be written out of order
+        // We cannot just wait for the last entry (the one pointed by put) to
+        // become valid, we have to do it individually since entries can be
+        // written out of order
        UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
            // We have some entry to work on. Let's do the rest later.
            if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
                goto done;
+
+            // There's no entry to work on and something has gone wrong. Ignore
+            // the rest.
+            if (uvm_global_get_status() != NV_OK)
+               goto done;
        }

        // Prevent later accesses being moved above the read of the valid bit
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -631,7 +631,15 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,

    while (get != put) {
        // Wait until valid bit is set
-        UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
+        UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
+            // Channels might be idle (e.g. in teardown) so check for errors
+            // actively.
+            status = uvm_channel_manager_check_errors(gpu->channel_manager);
+            if (status != NV_OK) {
+                write_get(parent_gpu, get);
+                return status;
+            }
+        }

        fault_buffer_skip_replayable_entry(parent_gpu, get);
        ++get;
@@ -864,6 +872,10 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
            // We have some entry to work on. Let's do the rest later.
            if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
                goto done;
+            
+            status = uvm_global_get_status();
+            if (status != NV_OK)
+                goto done;
        }

        // Prevent later accesses being moved above the read of the valid bit
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -50,18 +50,18 @@
 // because that type is normally associated with the LCE mapped to the most
 // PCEs. The higher bandwidth is beneficial when doing bulk operations such as
 // clearing PTEs, or initializing a page directory/table.
-#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({                                                        \
-    NV_STATUS status;                                                                                                       \
-    uvm_channel_manager_t *manager = (tree)->gpu->channel_manager;                                                          \
-                                                                                                                            \
-    if (manager == NULL)                                                                                                    \
-        status = uvm_push_begin_fake((tree)->gpu, (push));                                                                  \
-    else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent))                                                  \
-        status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__);      \
-    else                                                                                                                    \
-        status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
-                                                                                                                            \
-    status;                                                                                                                 \
+#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({                                                            \
+    NV_STATUS __status;                                                                                                         \
+    uvm_channel_manager_t *__manager = (tree)->gpu->channel_manager;                                                            \
+                                                                                                                                \
+    if (__manager == NULL)                                                                                                      \
+        __status = uvm_push_begin_fake((tree)->gpu, (push));                                                                    \
+    else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent))                                                      \
+        __status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__);      \
+    else                                                                                                                        \
+        __status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
+                                                                                                                                \
+    __status;                                                                                                                   \
 })

 // Default location of page table allocations
--- a/kernel-open/nvidia-uvm/uvm_pmm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_test.c
@@ -1127,7 +1127,6 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
    // incrementally. Therefore, the reverse translations will show them in
    // order.
    uvm_for_each_va_range_in(va_range, va_space, addr, addr + size - 1) {
-        uvm_va_block_t *va_block;

        for_each_va_block_in_va_range(va_range, va_block) {
            NvU32 num_va_block_pages = 0;
--- a/kernel-open/nvidia-uvm/uvm_tracker_test.c
+++ b/kernel-open/nvidia-uvm/uvm_tracker_test.c
@@ -149,7 +149,7 @@ done:
 static NV_STATUS test_tracker_basic(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
-    uvm_channel_t *channel;
+    uvm_channel_t *any_channel;
    uvm_tracker_t tracker;
    uvm_tracker_entry_t entry;
    NvU32 count = 0;
@@ -159,15 +159,15 @@ static NV_STATUS test_tracker_basic(uvm_va_space_t *va_space)
    if (gpu == NULL)
        return NV_ERR_INVALID_STATE;

-    channel = uvm_channel_any(gpu->channel_manager);
-    if (channel == NULL)
+    any_channel = uvm_channel_any(gpu->channel_manager);
+    if (any_channel == NULL)
        return NV_ERR_INVALID_STATE;

    uvm_tracker_init(&tracker);
    TEST_CHECK_GOTO(assert_tracker_is_completed(&tracker) == NV_OK, done);

    // Some channel
-    entry.channel = channel;
+    entry.channel = any_channel;
    entry.value = 1;

    status = uvm_tracker_add_entry(&tracker, &entry);
@@ -258,7 +258,7 @@ done:
 static NV_STATUS test_tracker_overwrite(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
-    uvm_channel_t *channel;
+    uvm_channel_t *any_channel;
    uvm_tracker_t tracker, dup_tracker;
    uvm_tracker_entry_t entry;
    uvm_tracker_entry_t *entry_iter, *dup_entry_iter;
@@ -270,15 +270,15 @@ static NV_STATUS test_tracker_overwrite(uvm_va_space_t *va_space)
    if (gpu == NULL)
        return NV_ERR_INVALID_STATE;

-    channel = uvm_channel_any(gpu->channel_manager);
-    if (channel == NULL)
+    any_channel = uvm_channel_any(gpu->channel_manager);
+    if (any_channel == NULL)
        return NV_ERR_INVALID_STATE;

    uvm_tracker_init(&tracker);
    TEST_CHECK_GOTO(assert_tracker_is_completed(&tracker) == NV_OK, done);

    // Some channel
-    entry.channel = channel;
+    entry.channel = any_channel;
    entry.value = 1;

    status = uvm_tracker_add_entry(&tracker, &entry);
@@ -351,7 +351,7 @@ done:
 static NV_STATUS test_tracker_add_tracker(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
-    uvm_channel_t *channel;
+    uvm_channel_t *any_channel;
    uvm_tracker_t tracker, dup_tracker;
    uvm_tracker_entry_t entry;
    uvm_tracker_entry_t *entry_iter, *dup_entry_iter;
@@ -362,8 +362,8 @@ static NV_STATUS test_tracker_add_tracker(uvm_va_space_t *va_space)
    if (gpu == NULL)
        return NV_ERR_INVALID_STATE;

-    channel = uvm_channel_any(gpu->channel_manager);
-    if (channel == NULL)
+    any_channel = uvm_channel_any(gpu->channel_manager);
+    if (any_channel == NULL)
        return NV_ERR_INVALID_STATE;

    uvm_tracker_init(&tracker);
@@ -371,7 +371,7 @@ static NV_STATUS test_tracker_add_tracker(uvm_va_space_t *va_space)
    TEST_CHECK_GOTO(assert_tracker_is_completed(&tracker) == NV_OK, done);

    // Some channel
-    entry.channel = channel;
+    entry.channel = any_channel;
    entry.value = 1;

    status = uvm_tracker_add_entry(&tracker, &entry);
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -3493,8 +3493,6 @@ static NV_STATUS block_copy_begin_push(uvm_va_block_t *va_block,
    }

    if (UVM_ID_IS_CPU(src_id) && UVM_ID_IS_CPU(dst_id)) {
-        uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
-
        gpu = uvm_va_space_find_first_gpu_attached_to_cpu_node(va_space, copy_state->src.nid);
        if (!gpu)
            gpu = uvm_va_space_find_first_gpu(va_space);
@@ -4486,8 +4484,6 @@ static NV_STATUS block_copy_resident_pages_mask(uvm_va_block_t *block,
    uvm_processor_mask_copy(search_mask, src_processor_mask);

    for_each_closest_id(src_id, search_mask, dst_id, va_space) {
-        NV_STATUS status;
-
        if (UVM_ID_IS_CPU(src_id)) {
            int nid;

@@ -8939,13 +8935,13 @@ NV_STATUS uvm_va_block_revoke_prot(uvm_va_block_t *va_block,
    uvm_processor_mask_copy(resident_procs, &va_block->resident);

    for_each_closest_id(resident_id, resident_procs, gpu->id, va_space) {
-        NV_STATUS status = block_revoke_prot_gpu_to(va_block,
-                                                    va_block_context,
-                                                    gpu,
-                                                    resident_id,
-                                                    running_page_mask,
-                                                    prot_to_revoke,
-                                                    out_tracker);
+        status = block_revoke_prot_gpu_to(va_block,
+                                          va_block_context,
+                                          gpu,
+                                          resident_id,
+                                          running_page_mask,
+                                          prot_to_revoke,
+                                          out_tracker);
        if (status != NV_OK)
            break;

@@ -12208,16 +12204,16 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,

        // Map pages that are thrashing
        if (service_context->thrashing_pin_count > 0) {
-            uvm_page_index_t page_index;
+            uvm_page_index_t pinned_page_index;

-            for_each_va_block_page_in_region_mask(page_index,
+            for_each_va_block_page_in_region_mask(pinned_page_index,
                                                  &service_context->thrashing_pin_mask,
                                                  service_context->region) {
                uvm_processor_mask_t *map_thrashing_processors = NULL;
-                NvU64 page_addr = uvm_va_block_cpu_page_address(va_block, page_index);
+                NvU64 page_addr = uvm_va_block_cpu_page_address(va_block, pinned_page_index);

                // Check protection type
-                if (!uvm_page_mask_test(caller_page_mask, page_index))
+                if (!uvm_page_mask_test(caller_page_mask, pinned_page_index))
                    continue;

                map_thrashing_processors = uvm_perf_thrashing_get_thrashing_processors(va_block, page_addr);
@@ -12226,7 +12222,7 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
                                                                   service_context->block_context,
                                                                   new_residency,
                                                                   processor_id,
-                                                                   uvm_va_block_region_for_page(page_index),
+                                                                   uvm_va_block_region_for_page(pinned_page_index),
                                                                   caller_page_mask,
                                                                   new_prot,
                                                                   map_thrashing_processors);
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@@ -2274,7 +2274,7 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
 // returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
 // block's state is not locked across these calls.
 #define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({     \
-    NV_STATUS status;                                               \
+    NV_STATUS __status;                                             \
    uvm_va_block_t *__block = (va_block);                           \
    uvm_va_block_retry_t *__retry = (block_retry);                  \
                                                                    \
@@ -2283,14 +2283,14 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
    uvm_mutex_lock(&__block->lock);                                 \
                                                                    \
    do {                                                            \
-        status = (call);                                            \
-    } while (status == NV_ERR_MORE_PROCESSING_REQUIRED);            \
+        __status = (call);                                          \
+    } while (__status == NV_ERR_MORE_PROCESSING_REQUIRED);          \
                                                                    \
    uvm_mutex_unlock(&__block->lock);                               \
                                                                    \
    uvm_va_block_retry_deinit(__retry, __block);                    \
                                                                    \
-    status;                                                         \
+    __status;                                                       \
 })

 // A helper macro for handling allocation-retry
@@ -2305,7 +2305,7 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
 // to be already taken. Notably the block's lock might be unlocked and relocked
 // as part of the call.
 #define UVM_VA_BLOCK_RETRY_LOCKED(va_block, block_retry, call) ({   \
-    NV_STATUS status;                                               \
+    NV_STATUS __status;                                             \
    uvm_va_block_t *__block = (va_block);                           \
    uvm_va_block_retry_t *__retry = (block_retry);                  \
                                                                    \
@@ -2314,12 +2314,12 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
    uvm_assert_mutex_locked(&__block->lock);                        \
                                                                    \
    do {                                                            \
-        status = (call);                                            \
-    } while (status == NV_ERR_MORE_PROCESSING_REQUIRED);            \
+        __status = (call);                                          \
+    } while (__status == NV_ERR_MORE_PROCESSING_REQUIRED);          \
                                                                    \
    uvm_va_block_retry_deinit(__retry, __block);                    \
                                                                    \
-    status;                                                         \
+    __status;                                                       \
 })

 #endif // __UVM_VA_BLOCK_H__