535.86.05

2026-05-12 00:41:27 +00:00 · 2023-07-18 15:54:53 +02:00
parent 22a077c4fe
commit 337e28efda
264 changed files with 67251 additions and 107479 deletions
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -108,5 +108,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
+NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -24,6 +24,7 @@
 #include "uvm_va_range.h"
 #include "uvm_ats_faults.h"
 #include "uvm_migrate_pageable.h"
+#include <linux/mempolicy.h>

 // TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
 // these experimental parameters. These are intended to help guide that policy.
@@ -79,7 +80,7 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
                                    NvU64 start,
                                    size_t length,
                                    uvm_fault_access_type_t access_type,
-                                    uvm_fault_client_type_t client_type)
+                                    uvm_ats_fault_context_t *ats_context)
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    struct mm_struct *mm = va_space->va_space_mm.mm;
@@ -95,17 +96,18 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
    // 2) guest physical -> host physical
    //
    // The overall ATS translation will fault if either of those translations is
-    // invalid. The get_user_pages() call above handles translation #1, but not
-    // #2. We don't know if we're running as a guest, but in case we are we can
-    // force that translation to be valid by touching the guest physical address
-    // from the CPU. If the translation is not valid then the access will cause
-    // a hypervisor fault. Note that dma_map_page() can't establish mappings
-    // used by GPU ATS SVA translations. GPU accesses to host physical addresses
-    // obtained as a result of the address translation request uses the CPU
-    // address space instead of the IOMMU address space since the translated
-    // host physical address isn't necessarily an IOMMU address. The only way to
-    // establish guest physical to host physical mapping in the CPU address
-    // space is to touch the page from the CPU.
+    // invalid. The pin_user_pages() call within uvm_migrate_pageable() call
+    // below handles translation #1, but not #2. We don't know if we're running
+    // as a guest, but in case we are we can force that translation to be valid
+    // by touching the guest physical address from the CPU. If the translation
+    // is not valid then the access will cause a hypervisor fault. Note that
+    // dma_map_page() can't establish mappings used by GPU ATS SVA translations.
+    // GPU accesses to host physical addresses obtained as a result of the
+    // address translation request uses the CPU address space instead of the
+    // IOMMU address space since the translated host physical address isn't
+    // necessarily an IOMMU address. The only way to establish guest physical to
+    // host physical mapping in the CPU address space is to touch the page from
+    // the CPU.
    //
    // We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
    // VM_WRITE, meaning that the mappings are all granted write access on any
@@ -116,20 +118,26 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,

    uvm_migrate_args_t uvm_migrate_args =
    {
-        .va_space               = va_space,
-        .mm                     = mm,
-        .dst_id                 = gpu_va_space->gpu->parent->id,
-        .dst_node_id            = -1,
-        .populate_permissions   = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
-        .touch                  = true,
-        .skip_mapped            = true,
-        .user_space_start       = &user_space_start,
-        .user_space_length      = &user_space_length,
+        .va_space                       = va_space,
+        .mm                             = mm,
+        .dst_id                         = ats_context->residency_id,
+        .dst_node_id                    = ats_context->residency_node,
+        .populate_permissions           = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
+        .touch                          = true,
+        .skip_mapped                    = true,
+        .populate_on_cpu_alloc_failures = true,
+        .user_space_start               = &user_space_start,
+        .user_space_length              = &user_space_length,
    };

    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));

-    expand_fault_region(vma, start, length, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
+    expand_fault_region(vma,
+                        start,
+                        length,
+                        ats_context->client_type,
+                        &uvm_migrate_args.start,
+                        &uvm_migrate_args.length);

    // We are trying to use migrate_vma API in the kernel (if it exists) to
    // populate and map the faulting region on the GPU. We want to do this only
@@ -165,6 +173,58 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
    uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
 }

+static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
+                                       struct vm_area_struct *vma,
+                                       uvm_ats_fault_context_t *ats_context)
+{
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    int residency = uvm_gpu_numa_node(gpu);
+
+#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
+    struct mempolicy *vma_policy = vma_policy(vma);
+    unsigned short mode;
+
+    if (!vma_policy)
+        goto done;
+
+    mode = vma_policy->mode;
+
+    if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
+        int home_node = NUMA_NO_NODE;
+
+#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
+        if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
+            home_node = vma_policy->home_node;
+#endif
+
+        // Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
+        // in the list of preferred nodes, else prefer the closest_cpu_numa_node
+        // to the GPU if closest_cpu_numa_node is in the list of preferred
+        // nodes. Fallback to the faulting GPU if all else fails.
+        if (home_node != NUMA_NO_NODE) {
+            residency = home_node;
+        }
+        else if (!node_isset(residency, vma_policy->nodes)) {
+            int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
+
+            if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
+                residency = gpu->parent->closest_cpu_numa_node;
+            else
+                residency = first_node(vma_policy->nodes);
+        }
+    }
+
+    // Update gpu if residency is not the faulting gpu.
+    if (residency != uvm_gpu_numa_node(gpu))
+        gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
+
+done:
+#endif
+
+    ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
+    ats_context->residency_node = residency;
+}
+
 NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
                                 struct vm_area_struct *vma,
                                 NvU64 base,
@@ -205,6 +265,8 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
            uvm_page_mask_zero(write_fault_mask);
    }

+    ats_batch_select_residency(gpu_va_space, vma, ats_context);
+
    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
        NvU64 start = base + (subregion.first * PAGE_SIZE);
        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
@@ -215,7 +277,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, client_type);
+        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
        if (status != NV_OK)
            return status;

@@ -244,11 +306,12 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
        NvU64 start = base + (subregion.first * PAGE_SIZE);
        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
+        uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;

        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, UVM_FAULT_ACCESS_TYPE_READ, client_type);
+        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
        if (status != NV_OK)
            return status;

--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -152,7 +152,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
            break;

        if (entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
-            uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
+            uvm_pushbuffer_mark_completed(channel, entry);
            list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
        }

@@ -1035,6 +1035,57 @@ static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
    return status;
 }

+// When the Confidential Computing feature is enabled, the CPU is unable to
+// access and read the pushbuffer. This is because it is located in the CPR of
+// vidmem in this configuration. This function allows UVM to retrieve the
+// content of the pushbuffer in an encrypted form for later decryption, hence,
+// simulating the original access pattern. E.g, reading timestamp semaphores.
+// See also: decrypt_push().
+static void encrypt_push(uvm_push_t *push)
+{
+    NvU64 push_protected_gpu_va;
+    NvU64 push_unprotected_gpu_va;
+    uvm_gpu_address_t auth_tag_gpu_va;
+    uvm_channel_t *channel = push->channel;
+    uvm_push_crypto_bundle_t *crypto_bundle;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU32 push_size = uvm_push_get_size(push);
+    uvm_push_info_t *push_info = uvm_push_info_from_push(push);
+    uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
+    unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
+
+    if (!uvm_conf_computing_mode_enabled(gpu))
+        return;
+
+    if (!push_info->on_complete)
+        return;
+
+    if (!uvm_channel_is_ce(channel))
+        return;
+
+    if (push_size == 0)
+        return;
+
+    UVM_ASSERT(!uvm_channel_is_wlc(channel));
+    UVM_ASSERT(!uvm_channel_is_lcic(channel));
+    UVM_ASSERT(channel->conf_computing.push_crypto_bundles != NULL);
+
+    crypto_bundle = channel->conf_computing.push_crypto_bundles + push->push_info_index;
+    auth_tag_gpu_va = uvm_rm_mem_get_gpu_va(channel->conf_computing.push_crypto_bundle_auth_tags, gpu, false);
+    auth_tag_gpu_va.address += auth_tag_offset;
+
+    crypto_bundle->push_size = push_size;
+    push_protected_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
+    push_unprotected_gpu_va = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
+
+    uvm_conf_computing_log_gpu_encryption(channel, &crypto_bundle->iv);
+    gpu->parent->ce_hal->encrypt(push,
+                                 uvm_gpu_address_virtual_unprotected(push_unprotected_gpu_va),
+                                 uvm_gpu_address_virtual(push_protected_gpu_va),
+                                 push_size,
+                                 auth_tag_gpu_va);
+}
+
 void uvm_channel_end_push(uvm_push_t *push)
 {
    uvm_channel_t *channel = push->channel;
@@ -1051,6 +1102,8 @@ void uvm_channel_end_push(uvm_push_t *push)

    channel_pool_lock(channel->pool);

+    encrypt_push(push);
+
    new_tracking_value = ++channel->tracking_sem.queued_value;
    new_payload = (NvU32)new_tracking_value;

@@ -1561,11 +1614,15 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
    uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
    uvm_rm_mem_free(channel->conf_computing.static_pb_unprotected_sysmem);
    uvm_rm_mem_free(channel->conf_computing.static_notifier_unprotected_sysmem);
+    uvm_rm_mem_free(channel->conf_computing.push_crypto_bundle_auth_tags);
    uvm_kvfree(channel->conf_computing.static_pb_protected_sysmem);
+    uvm_kvfree(channel->conf_computing.push_crypto_bundles);
    channel->conf_computing.static_pb_protected_vidmem = NULL;
    channel->conf_computing.static_pb_unprotected_sysmem = NULL;
    channel->conf_computing.static_notifier_unprotected_sysmem = NULL;
+    channel->conf_computing.push_crypto_bundle_auth_tags = NULL;
    channel->conf_computing.static_pb_protected_sysmem = NULL;
+    channel->conf_computing.push_crypto_bundles = NULL;

    uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.encrypted_payload);
    uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.notifier);
@@ -1702,14 +1759,34 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
 {
    NV_STATUS status;

-    status  = alloc_conf_computing_buffers_semaphore(channel);
+    UVM_ASSERT(uvm_channel_is_secure_ce(channel));
+
+    status = alloc_conf_computing_buffers_semaphore(channel);
    if (status != NV_OK)
        return status;

-    if (uvm_channel_is_wlc(channel))
+    if (uvm_channel_is_wlc(channel)) {
        status = alloc_conf_computing_buffers_wlc(channel);
-    else if (uvm_channel_is_lcic(channel))
+    }
+    else if (uvm_channel_is_lcic(channel)) {
        status = alloc_conf_computing_buffers_lcic(channel);
+    }
+    else {
+        uvm_gpu_t *gpu = channel->pool->manager->gpu;
+        void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
+                                                      channel->num_gpfifo_entries);
+
+        if (push_crypto_bundles == NULL)
+            return NV_ERR_NO_MEMORY;
+
+        channel->conf_computing.push_crypto_bundles = push_crypto_bundles;
+
+        status = uvm_rm_mem_alloc_and_map_cpu(gpu,
+                                              UVM_RM_MEM_TYPE_SYS,
+                                              channel->num_gpfifo_entries * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                              UVM_CONF_COMPUTING_BUF_ALIGNMENT,
+                                              &channel->conf_computing.push_crypto_bundle_auth_tags);
+    }

    return status;
 }
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -355,6 +355,13 @@ struct uvm_channel_struct
        // Encryption auth tags have to be located in unprotected sysmem.
        void *launch_auth_tag_cpu;
        NvU64 launch_auth_tag_gpu_va;
+
+        // Used to decrypt the push back to protected sysmem.
+        // This happens when profilers register callbacks for migration data.
+        uvm_push_crypto_bundle_t *push_crypto_bundles;
+
+        // Accompanying authentication tags for the crypto bundles
+        uvm_rm_mem_t *push_crypto_bundle_auth_tags;
    } conf_computing;

    // RM channel information
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -26,6 +26,7 @@
 #include "uvm_conf_computing.h"
 #include "uvm_kvmalloc.h"
 #include "uvm_gpu.h"
+#include "uvm_hal.h"
 #include "uvm_mem.h"
 #include "uvm_processors.h"
 #include "uvm_tracker.h"
@@ -60,8 +61,7 @@ NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

-    // TODO: Bug 2844714.
-    // Since we have no routine to traverse parent gpus,
+    // TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
    // find first child GPU and get its parent.
    first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
    if (!first)
@@ -448,3 +448,51 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,

    return status;
 }
+
+NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
+                                           void *dst_plain,
+                                           const void *src_cipher,
+                                           const void *auth_tag_buffer,
+                                           NvU8 valid)
+{
+    NV_STATUS status;
+
+    // There is no dedicated lock for the CSL context associated with replayable
+    // faults. The mutual exclusion required by the RM CSL API is enforced by
+    // relying on the GPU replayable service lock (ISR lock), since fault
+    // decryption is invoked as part of fault servicing.
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+
+    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+
+    status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
+                                      parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
+                                      (const NvU8 *) src_cipher,
+                                      NULL,
+                                      (NvU8 *) dst_plain,
+                                      &valid,
+                                      sizeof(valid),
+                                      (const NvU8 *) auth_tag_buffer);
+
+    if (status != NV_OK)
+        UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
+
+    return status;
+}
+
+void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
+{
+    NV_STATUS status;
+
+    // See comment in uvm_conf_computing_fault_decrypt
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+
+    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+
+    status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
+                                          UVM_CSL_OPERATION_DECRYPT,
+                                          increment,
+                                          NULL);
+
+    UVM_ASSERT(status == NV_OK);
+}
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@@ -177,4 +177,28 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
                                         const UvmCslIv *src_iv,
                                         size_t size,
                                         const void *auth_tag_buffer);
+
+// CPU decryption of a single replayable fault, encrypted by GSP-RM.
+//
+// Replayable fault decryption depends not only on the encrypted fault contents,
+// and the authentication tag, but also on the plaintext valid bit associated
+// with the fault.
+//
+// When decrypting data previously encrypted by the Copy Engine, use
+// uvm_conf_computing_cpu_decrypt instead.
+//
+// Locking: this function must be invoked while holding the replayable ISR lock.
+NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
+                                           void *dst_plain,
+                                           const void *src_cipher,
+                                           const void *auth_tag_buffer,
+                                           NvU8 valid);
+
+// Increment the CPU-side decrypt IV of the CSL context associated with
+// replayable faults. The function is a no-op if the given increment is zero.
+//
+// The IV associated with a fault CSL context is a 64-bit counter.
+//
+// Locking: this function must be invoked while holding the replayable ISR lock.
+void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
 #endif // __UVM_CONF_COMPUTING_H__
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -50,6 +50,7 @@ typedef struct uvm_channel_struct uvm_channel_t;
 typedef struct uvm_user_channel_struct uvm_user_channel_t;
 typedef struct uvm_push_struct uvm_push_t;
 typedef struct uvm_push_info_struct uvm_push_info_t;
+typedef struct uvm_push_crypto_bundle_struct uvm_push_crypto_bundle_t;
 typedef struct uvm_push_acquire_info_struct uvm_push_acquire_info_t;
 typedef struct uvm_pushbuffer_struct uvm_pushbuffer_t;
 typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -198,6 +198,12 @@ typedef struct

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
+
+    // New residency ID of the faulting region.
+    uvm_processor_id_t residency_id;
+
+    // New residency NUMA node ID of the faulting region.
+    int residency_node;
 } uvm_ats_fault_context_t;

 struct uvm_fault_service_batch_context_struct
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -177,31 +177,34 @@ bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
    return has_pending_faults == NV_TRUE;
 }

-static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
+static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu, NvU32 *cached_faults)
 {
    NV_STATUS status;
-    NvU32 i = 0;
-    NvU32 cached_faults = 0;
-    uvm_fault_buffer_entry_t *fault_cache;
-    NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    NvU32 i;
+    NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
+    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
+    uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;

-    fault_cache = non_replayable_faults->fault_cache;
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
+    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
-    UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
+    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+                                                  current_hw_entry,
+                                                  cached_faults);

-    status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
-                                                  non_replayable_faults->shadow_buffer_copy,
-                                                  &cached_faults);
-    UVM_ASSERT(status == NV_OK);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("nvUvmInterfaceGetNonReplayableFaults() failed: %s, GPU %s\n",
+                      nvstatusToString(status),
+                      parent_gpu->name);
+
+        uvm_global_set_fatal_error(status);
+        return status;
+    }

    // Parse all faults
-    for (i = 0; i < cached_faults; ++i) {
-        uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
-
-        gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
+    for (i = 0; i < *cached_faults; ++i) {
+        parent_gpu->fault_buffer_hal->parse_non_replayable_entry(parent_gpu, current_hw_entry, fault_entry);

        // The GPU aligns the fault addresses to 4k, but all of our tracking is
        // done in PAGE_SIZE chunks which might be larger.
@@ -226,9 +229,10 @@ static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
        }

        current_hw_entry += entry_size;
+        fault_entry++;
    }

-    return cached_faults;
+    return NV_OK;
 }

 // In SRIOV, the UVM (guest) driver does not have access to the privileged
@@ -705,21 +709,28 @@ exit_no_channel:
    uvm_va_space_up_read(va_space);
    uvm_va_space_mm_release_unlock(va_space, mm);

+    if (status != NV_OK)
+        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
+
    return status;
 }

 void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
 {
-    NV_STATUS status = NV_OK;
    NvU32 cached_faults;

    // If this handler is modified to handle fewer than all of the outstanding
    // faults, then special handling will need to be added to uvm_suspend()
    // to guarantee that fault processing has completed before control is
    // returned to the RM.
-    while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
+    do {
+        NV_STATUS status;
        NvU32 i;

+        status = fetch_non_replayable_fault_buffer_entries(gpu->parent, &cached_faults);
+        if (status != NV_OK)
+            return;
+
        // Differently to replayable faults, we do not batch up and preprocess
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
@@ -728,10 +739,7 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
        for (i = 0; i < cached_faults; ++i) {
            status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
            if (status != NV_OK)
-                break;
+                return;
        }
-    }
-
-    if (status != NV_OK)
-        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
+    } while (cached_faults > 0);
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -486,7 +486,9 @@ static NV_STATUS cancel_fault_precise_va(uvm_gpu_t *gpu,
    return status;
 }

-static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu, uvm_fault_replay_type_t type, uvm_fault_service_batch_context_t *batch_context)
+static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
+                                    uvm_fault_replay_type_t type,
+                                    uvm_fault_service_batch_context_t *batch_context)
 {
    NV_STATUS status;
    uvm_push_t push;
@@ -572,6 +574,19 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
    return status;
 }

+static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UVM_ASSERT(parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, index));
+
+    // Flushed faults are never decrypted, but the decryption IV associated with
+    // replayable faults still requires manual adjustment so it is kept in sync
+    // with the encryption IV on the GSP-RM's side.
+    if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu))
+        uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);
+
+    parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
+}
+
 static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
                                           uvm_gpu_buffer_flush_mode_t flush_mode,
                                           uvm_fault_replay_type_t fault_replay,
@@ -610,7 +625,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
        // Wait until valid bit is set
        UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);

-        parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, get);
+        fault_buffer_skip_replayable_entry(parent_gpu, get);
        ++get;
        if (get == replayable_faults->max_faults)
            get = 0;
@@ -785,9 +800,9 @@ static bool fetch_fault_buffer_try_merge_entry(uvm_fault_buffer_entry_t *current
 // This optimization cannot be performed during fault cancel on Pascal GPUs
 // (fetch_mode == FAULT_FETCH_MODE_ALL) since we need accurate tracking of all
 // the faults in each uTLB in order to guarantee precise fault attribution.
-static void fetch_fault_buffer_entries(uvm_gpu_t *gpu,
-                                       uvm_fault_service_batch_context_t *batch_context,
-                                       fault_fetch_mode_t fetch_mode)
+static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
+                                            uvm_fault_service_batch_context_t *batch_context,
+                                            fault_fetch_mode_t fetch_mode)
 {
    NvU32 get;
    NvU32 put;
@@ -796,6 +811,7 @@ static void fetch_fault_buffer_entries(uvm_gpu_t *gpu,
    NvU32 utlb_id;
    uvm_fault_buffer_entry_t *fault_cache;
    uvm_spin_loop_t spin;
+    NV_STATUS status = NV_OK;
    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
    const bool in_pascal_cancel_path = (!gpu->parent->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
    const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
@@ -851,7 +867,9 @@ static void fetch_fault_buffer_entries(uvm_gpu_t *gpu,
        smp_mb__after_atomic();

        // Got valid bit set. Let's cache.
-        gpu->parent->fault_buffer_hal->parse_entry(gpu->parent, get, current_entry);
+        status = gpu->parent->fault_buffer_hal->parse_replayable_entry(gpu->parent, get, current_entry);
+        if (status != NV_OK)
+            goto done;

        // The GPU aligns the fault addresses to 4k, but all of our tracking is
        // done in PAGE_SIZE chunks which might be larger.
@@ -918,6 +936,8 @@ done:

    batch_context->num_cached_faults = fault_index;
    batch_context->num_coalesced_faults = num_coalesced_faults;
+
+    return status;
 }

 // Sort comparator for pointers to fault buffer entries that sorts by
@@ -2475,7 +2495,10 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
        batch_context->has_throttled_faults        = false;

        // 5) Fetch all faults from buffer
-        fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
+        status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
+        if (status != NV_OK)
+            break;
+
        ++batch_context->batch_id;

        UVM_ASSERT(batch_context->num_cached_faults == batch_context->num_coalesced_faults);
@@ -2612,7 +2635,10 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
        batch_context->has_fatal_faults            = false;
        batch_context->has_throttled_faults        = false;

-        fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
+        status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
+        if (status != NV_OK)
+            break;
+
        if (batch_context->num_cached_faults == 0)
            break;

--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -373,7 +373,7 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
            .write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
            .get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
-            .parse_entry = uvm_hal_maxwell_fault_buffer_parse_entry_unsupported,
+            .parse_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported,
            .entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
            .entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
            .entry_size = uvm_hal_maxwell_fault_buffer_entry_size_unsupported,
@@ -396,7 +396,7 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_put = uvm_hal_pascal_fault_buffer_read_put,
            .read_get = uvm_hal_pascal_fault_buffer_read_get,
            .write_get = uvm_hal_pascal_fault_buffer_write_get,
-            .parse_entry = uvm_hal_pascal_fault_buffer_parse_entry,
+            .parse_replayable_entry = uvm_hal_pascal_fault_buffer_parse_replayable_entry,
            .entry_is_valid = uvm_hal_pascal_fault_buffer_entry_is_valid,
            .entry_clear_valid = uvm_hal_pascal_fault_buffer_entry_clear_valid,
            .entry_size = uvm_hal_pascal_fault_buffer_entry_size,
@@ -411,7 +411,7 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
            .read_get = uvm_hal_volta_fault_buffer_read_get,
            .write_get = uvm_hal_volta_fault_buffer_write_get,
            .get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
-            .parse_entry = uvm_hal_volta_fault_buffer_parse_entry,
+            .parse_replayable_entry = uvm_hal_volta_fault_buffer_parse_replayable_entry,
            .parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
            .get_fault_type = uvm_hal_volta_fault_buffer_get_fault_type,
        }
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -485,11 +485,24 @@ typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
 typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);

-// Parse the entry on the given buffer index. This also clears the valid bit of
-// the entry in the buffer.
-typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
-                                                   NvU32 index,
-                                                   uvm_fault_buffer_entry_t *buffer_entry);
+// Parse the replayable entry at the given buffer index. This also clears the
+// valid bit of the entry in the buffer.
+typedef NV_STATUS (*uvm_hal_fault_buffer_parse_replayable_entry_t)(uvm_parent_gpu_t *gpu,
+                                                                   NvU32 index,
+                                                                   uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                          NvU32 index,
+                                                                          uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                             NvU32 index,
+                                                             uvm_fault_buffer_entry_t *buffer_entry);
+
+NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                            NvU32 index,
+                                                            uvm_fault_buffer_entry_t *buffer_entry);
+
 typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
@@ -508,9 +521,6 @@ NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent
 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
-                                                          NvU32 index,
-                                                          uvm_fault_buffer_entry_t *buffer_entry);
 uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);

 void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
@@ -519,18 +529,14 @@ void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32
 NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                             NvU32 index,
-                                             uvm_fault_buffer_entry_t *buffer_entry);
+
 uvm_fault_type_t uvm_hal_pascal_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
-void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                            NvU32 index,
-                                            uvm_fault_buffer_entry_t *buffer_entry);
+
 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);

 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
@@ -772,7 +778,7 @@ struct uvm_fault_buffer_hal_struct
    uvm_hal_fault_buffer_read_get_t read_get;
    uvm_hal_fault_buffer_write_get_t write_get;
    uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
-    uvm_hal_fault_buffer_parse_entry_t parse_entry;
+    uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
    uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
    uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
    uvm_hal_fault_buffer_entry_size_t entry_size;
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -128,6 +128,13 @@ static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
    return address;
 }

+static uvm_gpu_address_t uvm_gpu_address_virtual_unprotected(NvU64 va)
+{
+    uvm_gpu_address_t address = uvm_gpu_address_virtual(va);
+    address.is_unprotected = true;
+    return address;
+}
+
 // Create a physical GPU address
 static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
 {
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -153,6 +153,10 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
 #define VM_MIXEDMAP    0x00000000
 #endif

+#if !defined(MPOL_PREFERRED_MANY)
+#define MPOL_PREFERRED_MANY    5
+#endif
+
 //
 // printk.h already defined pr_fmt, so we have to redefine it so the pr_*
 // routines pick up our version
--- a/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
@@ -68,11 +68,12 @@ uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const N
    return UVM_FAULT_TYPE_COUNT;
 }

-void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
-                                                         NvU32 index,
-                                                         uvm_fault_buffer_entry_t *buffer_entry)
+NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+                                                                          NvU32 index,
+                                                                          uvm_fault_buffer_entry_t *buffer_entry)
 {
    UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
+    return NV_ERR_NOT_SUPPORTED;
 }

 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -944,17 +944,18 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
        if (type == UVM_API_RANGE_TYPE_ATS) {
            uvm_migrate_args_t uvm_migrate_args =
            {
-                .va_space               = va_space,
-                .mm                     = mm,
-                .start                  = params->base,
-                .length                 = params->length,
-                .dst_id                 = (dest_gpu ? dest_gpu->id : UVM_ID_CPU),
-                .dst_node_id            = (int)params->cpuNumaNode,
-                .populate_permissions   = UVM_POPULATE_PERMISSIONS_INHERIT,
-                .touch                  = false,
-                .skip_mapped            = false,
-                .user_space_start       = &params->userSpaceStart,
-                .user_space_length      = &params->userSpaceLength,
+                .va_space                       = va_space,
+                .mm                             = mm,
+                .start                          = params->base,
+                .length                         = params->length,
+                .dst_id                         = (dest_gpu ? dest_gpu->id : UVM_ID_CPU),
+                .dst_node_id                    = (int)params->cpuNumaNode,
+                .populate_permissions           = UVM_POPULATE_PERMISSIONS_INHERIT,
+                .touch                          = false,
+                .skip_mapped                    = false,
+                .populate_on_cpu_alloc_failures = false,
+                .user_space_start               = &params->userSpaceStart,
+                .user_space_length              = &params->userSpaceLength,
            };

            status = uvm_migrate_pageable(&uvm_migrate_args);
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -507,6 +507,22 @@ static NV_STATUS migrate_vma_copy_pages(struct vm_area_struct *vma,
    return NV_OK;
 }

+void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages)
+{
+    unsigned long i;
+
+    for (i = 0; i < npages; i++) {
+        struct page *dst_page = migrate_pfn_to_page(dst[i]);
+
+        if (!dst_page)
+            continue;
+
+        unlock_page(dst_page);
+        __free_page(dst_page);
+        dst[i] = 0;
+    }
+}
+
 void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state)
 {
    struct vm_area_struct *vma = args->vma;
@@ -531,6 +547,10 @@ void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_

    if (state->status == NV_OK)
        state->status = tracker_status;
+
+    // Mark all pages as not migrating if we're failing
+    if (state->status != NV_OK)
+        migrate_vma_cleanup_pages(args->dst, state->num_pages);
 }

 void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
@@ -802,7 +822,7 @@ static NV_STATUS migrate_pageable_vma_region(struct vm_area_struct *vma,
        // If the destination is the CPU, signal user-space to retry with a
        // different node. Otherwise, just try to populate anywhere in the
        // system
-        if (UVM_ID_IS_CPU(uvm_migrate_args->dst_id)) {
+        if (UVM_ID_IS_CPU(uvm_migrate_args->dst_id) && !uvm_migrate_args->populate_on_cpu_alloc_failures) {
            *next_addr = start + find_first_bit(state->scratch2_mask, num_pages) * PAGE_SIZE;
            return NV_ERR_MORE_PROCESSING_REQUIRED;
        }
@@ -961,13 +981,10 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
        // We only check that dst_node_id is a valid node in the system and it
        // doesn't correspond to a GPU node. This is fine because
        // alloc_pages_node will clamp the allocation to
-        // cpuset_current_mems_allowed, and uvm_migrate_pageable is only called
-        // from process context (uvm_migrate) when dst_id is CPU. UVM bottom
-        // half never calls uvm_migrate_pageable when dst_id is CPU. So, assert
-        // that we're in a user thread. However, this would need to change if we
-        // wanted to call this function from a bottom half with CPU dst_id.
-        UVM_ASSERT(!(current->flags & PF_KTHREAD));
-
+        // cpuset_current_mems_allowed when uvm_migrate_pageable is called from
+        // process context (uvm_migrate) when dst_id is CPU. UVM bottom half
+        // calls uvm_migrate_pageable with CPU dst_id only when the VMA memory
+        // policy is set to dst_node_id and dst_node_id is not NUMA_NO_NODE.
        if (!nv_numa_node_has_memory(dst_node_id) ||
            uvm_va_space_find_gpu_with_memory_node_id(va_space, dst_node_id) != NULL)
            return NV_ERR_INVALID_ARGUMENT;
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
@@ -43,6 +43,7 @@ typedef struct
    uvm_populate_permissions_t      populate_permissions;
    bool                            touch : 1;
    bool                            skip_mapped : 1;
+    bool                            populate_on_cpu_alloc_failures : 1;
    NvU64                           *user_space_start;
    NvU64                           *user_space_length;
 } uvm_migrate_args_t;
--- a/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_fault_buffer.c
@@ -214,9 +214,9 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
    return fault_entry_metadata + index;
 }

-void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                             NvU32 index,
-                                             uvm_fault_buffer_entry_t *buffer_entry)
+NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                             NvU32 index,
+                                                             uvm_fault_buffer_entry_t *buffer_entry)
 {
    NvU32 *fault_entry;
    NvU64 addr_hi, addr_lo;
@@ -280,6 +280,8 @@ void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,

    // Automatically clear valid bit for the entry in the fault buffer
    uvm_hal_pascal_fault_buffer_entry_clear_valid(parent_gpu, index);
+
+    return NV_OK;
 }

 bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@@ -1455,7 +1455,18 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
    hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;

    closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
-    UVM_ASSERT(UVM_ID_IS_VALID(closest_resident_id));
+    if (uvm_va_block_is_hmm(va_block)) {
+        // HMM pages always start out resident on the CPU but may not be
+        // recorded in the va_block state because hmm_range_fault() or
+        // similar functions haven't been called to get an accurate snapshot
+        // of the Linux state. We can assume pages are CPU resident for the
+        // purpose of deciding where to migrate to reduce thrashing.
+        if (UVM_ID_IS_INVALID(closest_resident_id))
+            closest_resident_id = UVM_ID_CPU;
+    }
+    else {
+        UVM_ASSERT(UVM_ID_IS_VALID(closest_resident_id));
+    }

    if (thrashing_processors_can_access(va_space, page_thrashing, preferred_location)) {
        // The logic in uvm_va_block_select_residency chooses the preferred
--- a/kernel-open/nvidia-uvm/uvm_push.h
+++ b/kernel-open/nvidia-uvm/uvm_push.h
@@ -64,6 +64,14 @@ typedef enum
    UVM_PUSH_FLAG_COUNT,
 } uvm_push_flag_t;

+struct uvm_push_crypto_bundle_struct {
+    // Initialization vector used to decrypt the push
+    UvmCslIv iv;
+
+    // Size of the pushbuffer that is encrypted/decrypted
+    NvU32 push_size;
+};
+
 struct uvm_push_struct
 {
    // Location of the first method of the push
--- a/kernel-open/nvidia-uvm/uvm_push_test.c
+++ b/kernel-open/nvidia-uvm/uvm_push_test.c
@@ -776,15 +776,6 @@ static NV_STATUS test_timestamp_on_gpu(uvm_gpu_t *gpu)
    NvU32 i;
    NvU64 last_stamp = 0;

-    // TODO: Bug 3988992: [UVM][HCC] RFE - Support encrypted semaphore for secure CE channels
-    // This test is waived when Confidential Computing is enabled because it
-    // assumes that CPU can directly read the result of a semaphore timestamp
-    // operation. Instead the operation needs to be follower up by an encrypt
-    // -decrypt trip to be accessible to CPU. This will be cleaner and simpler
-    // once encrypted semaphores are available.
-    if (uvm_conf_computing_mode_enabled(gpu))
-        return NV_OK;
-
    for (i = 0; i < 10; ++i) {
        status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Releasing a timestamp");
        if (status != NV_OK)
--- a/kernel-open/nvidia-uvm/uvm_pushbuffer.c
+++ b/kernel-open/nvidia-uvm/uvm_pushbuffer.c
@@ -449,21 +449,68 @@ static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm
    return chunk;
 }

-void uvm_pushbuffer_mark_completed(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
+static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
+{
+    NV_STATUS status;
+    NvU32 auth_tag_offset;
+    void *auth_tag_cpu_va;
+    void *push_protected_cpu_va;
+    void *push_unprotected_cpu_va;
+    NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
+    NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
+    uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
+    uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
+
+    if (channel->conf_computing.push_crypto_bundles == NULL)
+        return;
+
+    // When the crypto bundle is used, the push size cannot be zero
+    if (crypto_bundle->push_size == 0)
+        return;
+
+    UVM_ASSERT(!uvm_channel_is_wlc(channel));
+    UVM_ASSERT(!uvm_channel_is_lcic(channel));
+
+    push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset;
+    push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
+    auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+    auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) +
+                              auth_tag_offset;
+
+    status = uvm_conf_computing_cpu_decrypt(channel,
+                                            push_protected_cpu_va,
+                                            push_unprotected_cpu_va,
+                                            &crypto_bundle->iv,
+                                            crypto_bundle->push_size,
+                                            auth_tag_cpu_va);
+
+    // A decryption failure here is not fatal because it does not
+    // prevent UVM from running fine in the future and cannot be used
+    // maliciously to leak information or otherwise derail UVM from its
+    // regular duties.
+    UVM_ASSERT_MSG_RELEASE(status == NV_OK, "Pushbuffer decryption failure: %s\n", nvstatusToString(status));
+
+    // Avoid reusing the bundle across multiple pushes
+    crypto_bundle->push_size = 0;
+}
+
+void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
 {
    uvm_pushbuffer_chunk_t *chunk;
-    uvm_push_info_t *push_info = gpfifo->push_info;
    bool need_to_update_chunk = false;
+    uvm_push_info_t *push_info = gpfifo->push_info;
+    uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;

    UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);

    chunk = gpfifo_to_chunk(pushbuffer, gpfifo);

-    if (push_info->on_complete != NULL)
+    if (push_info->on_complete != NULL) {
+        decrypt_push(channel, gpfifo);
        push_info->on_complete(push_info->on_complete_data);
-
-    push_info->on_complete = NULL;
-    push_info->on_complete_data = NULL;
+        push_info->on_complete = NULL;
+        push_info->on_complete_data = NULL;
+    }

    uvm_spin_lock(&pushbuffer->lock);

--- a/kernel-open/nvidia-uvm/uvm_pushbuffer.h
+++ b/kernel-open/nvidia-uvm/uvm_pushbuffer.h
@@ -258,7 +258,7 @@ NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *pu

 // Complete a pending push
 // Updates the chunk state the pending push used
-void uvm_pushbuffer_mark_completed(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo);
+void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo);

 // Get the GPU VA for an ongoing push
 NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push);
--- a/kernel-open/nvidia-uvm/uvm_sec2_test.c
+++ b/kernel-open/nvidia-uvm/uvm_sec2_test.c
@@ -275,13 +275,15 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
        TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
    }
    else {
-        if (type == MEM_ALLOC_TYPE_SYSMEM_DMA)
+        if (type == MEM_ALLOC_TYPE_SYSMEM_DMA) {
            TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
-        else
+            TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
+        }
+        else {
            TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, NULL, mem));
+        }

        TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
-        TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
        write_range_cpu(*mem, size, 0xdeaddead);
    }

@@ -443,7 +445,6 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
    cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
    gpu_decrypt(&push, dst_plain, src_cipher, auth_tag_mem, size, copy_size);

-
    // Wait for SEC2 before launching the CE part.
    // SEC2 is only allowed to release semaphores in unprotected sysmem,
    // and CE can only acquire semaphores in protected vidmem.
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -2083,12 +2083,6 @@ static uvm_processor_id_t block_page_get_closest_resident_in_mask(uvm_va_block_t
            return id;
    }

-    // HMM va_blocks don't know if a page is CPU resident until either
-    // migrate_vma_setup() or hmm_range_fault() is called. If a page isn't
-    // resident anywhere, assume it is CPU resident.
-    if (uvm_va_block_is_hmm(va_block))
-        return UVM_ID_CPU;
-
    return UVM_ID_INVALID;
 }

@@ -2888,7 +2882,7 @@ static uvm_va_block_region_t block_phys_contig_region(uvm_va_block_t *block,
 {
    if (UVM_ID_IS_CPU(resident_id)) {
        uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, page_index);
-        return uvm_va_block_region(page_index, page_index + uvm_cpu_chunk_num_pages(chunk));
+        return uvm_cpu_chunk_block_region(block, chunk, page_index);
    }
    else {
        uvm_chunk_size_t chunk_size;
--- a/kernel-open/nvidia-uvm/uvm_volta_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_volta_fault_buffer.c
@@ -25,7 +25,8 @@
 #include "uvm_global.h"
 #include "uvm_gpu.h"
 #include "uvm_hal.h"
-#include "uvm_push.h"
+#include "uvm_conf_computing.h"
+#include "nv_uvm_types.h"
 #include "hwref/volta/gv100/dev_fault.h"
 #include "hwref/volta/gv100/dev_fb.h"
 #include "clc369.h"
@@ -246,6 +247,20 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
    return fault_entry;
 }

+// See uvm_pascal_fault_buffer.c::get_fault_buffer_entry_metadata
+static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+{
+    UvmFaultMetadataPacket *fault_entry_metadata;
+
+    UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
+    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+
+    fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
+    UVM_ASSERT(fault_entry_metadata != NULL);
+
+    return fault_entry_metadata + index;
+}
+
 static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
                                     NvU32 *fault_entry,
                                     uvm_fault_buffer_entry_t *buffer_entry)
@@ -323,24 +338,47 @@ static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
    UVM_ASSERT_MSG(replayable_fault_enabled, "Fault with REPLAYABLE_FAULT_EN bit unset\n");
 }

-void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                            NvU32 index,
-                                            uvm_fault_buffer_entry_t *buffer_entry)
+NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
+                                                            NvU32 index,
+                                                            uvm_fault_buffer_entry_t *buffer_entry)
 {
+    fault_buffer_entry_c369_t entry;
    NvU32 *fault_entry;
-    BUILD_BUG_ON(NVC369_BUF_SIZE > UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE);
+
+    BUILD_BUG_ON(sizeof(entry) > UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE);

    // Valid bit must be set before this function is called
    UVM_ASSERT(parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, index));

    fault_entry = get_fault_buffer_entry(parent_gpu, index);

+    // When Confidential Computing is enabled, faults are encrypted by RM, so
+    // they need to be decrypted before they can be parsed
+    if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu)) {
+        NV_STATUS status;
+        UvmFaultMetadataPacket *fault_entry_metadata = get_fault_buffer_entry_metadata(parent_gpu, index);
+
+        status = uvm_conf_computing_fault_decrypt(parent_gpu,
+                                                  &entry,
+                                                  fault_entry,
+                                                  fault_entry_metadata->authTag,
+                                                  fault_entry_metadata->valid);
+        if (status != NV_OK) {
+            uvm_global_set_fatal_error(status);
+            return status;
+        }
+
+        fault_entry = (NvU32 *) &entry;
+    }
+
    parse_fault_entry_common(parent_gpu, fault_entry, buffer_entry);

    UVM_ASSERT(buffer_entry->is_replayable);

    // Automatically clear valid bit for the entry in the fault buffer
    parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
+
+    return NV_OK;
 }

 void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu,