535.43.09

2026-03-14 01:29:49 +00:00 · 2023-09-01 21:36:45 -07:00
parent 18b7303c54
commit 17546dbdda
122 changed files with 41587 additions and 34584 deletions
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.08\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.09\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
    // Out: ATS (Address Translation Services) is supported
    NvBool atsSupported;

-    // Out: AMD SEV (Secure Encrypted Virtualization) is enabled
-    NvBool sevEnabled;
+    // Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
+    // is enabled in the VM, indicating that Confidential Computing must be
+    // also enabled in the GPU(s); these two security features are either both
+    // enabled, or both disabled.
+    NvBool confComputingEnabled;
 } UvmPlatformInfo;

 typedef struct UvmGpuClientInfo_tag
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -6341,6 +6341,21 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
        ;;

+        mmu_interval_notifier)
+            #
+            # Determine if mmu_interval_notifier struct is present or not
+            #
+            # Added by commit 99cb252f5 ("mm/mmu_notifier: add an interval tree
+            # notifier") in v5.10 (2019-11-12).
+            #
+            CODE="
+            #include <linux/mmu_notifier.h>
+            struct mmu_interval_notifier interval_notifier;
+            "
+
+            compile_check_conftest "$CODE" "NV_MMU_INTERVAL_NOTIFIER" "" "types"
+        ;;
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.
        #
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -110,5 +110,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
 NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
+NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
--- a/kernel-open/nvidia-uvm/uvm_ats.c
+++ b/kernel-open/nvidia-uvm/uvm_ats.c
@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)

 void uvm_ats_init_va_space(uvm_va_space_t *va_space)
 {
+    uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
+
    if (UVM_ATS_IBM_SUPPORTED())
        uvm_ats_ibm_init_va_space(va_space);
 }
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -28,17 +28,32 @@
 #include "uvm_forward_decl.h"
 #include "uvm_ats_ibm.h"
 #include "nv_uvm_types.h"
+#include "uvm_lock.h"

    #include "uvm_ats_sva.h"

    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())

+// ATS prefetcher uses hmm_range_fault() to query residency information.
+// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
+// of memory regions while hmm_range_fault() is being called, MMU interval
+// notifiers are needed.
+    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
+        #define UVM_ATS_PREFETCH_SUPPORTED() 1
+    #else
+        #define UVM_ATS_PREFETCH_SUPPORTED() 0
+    #endif
+
 typedef struct
 {
    // Mask of gpu_va_spaces which are registered for ATS access. The mask is
    // indexed by gpu->id. This mask is protected by the VA space lock.
    uvm_processor_mask_t registered_gpu_va_spaces;

+    // Protects racing invalidates in the VA space while hmm_range_fault() is
+    // being called in ats_compute_residency_mask().
+    uvm_rw_semaphore_t lock;
+
    union
    {
        uvm_ibm_va_space_t ibm;
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -20,60 +20,19 @@
    DEALINGS IN THE SOFTWARE.
 *******************************************************************************/

+#include "uvm_api.h"
 #include "uvm_tools.h"
 #include "uvm_va_range.h"
+#include "uvm_ats.h"
 #include "uvm_ats_faults.h"
 #include "uvm_migrate_pageable.h"
+#include <linux/nodemask.h>
 #include <linux/mempolicy.h>
+#include <linux/mmu_notifier.h>

-// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
-// these experimental parameters. These are intended to help guide that policy.
-static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
-module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
-MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
-                 "Max order of pages (2^N) to prefetch on replayable ATS faults");
-
-static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
-module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
-MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
-                 "Max order of pages (2^N) to prefetch on non-replayable ATS faults");
-
-// Expand the fault region to the naturally-aligned region with order given by
-// the module parameters, clamped to the vma containing fault_addr (if any).
-// Note that this means the region contains fault_addr but may not begin at
-// fault_addr.
-static void expand_fault_region(struct vm_area_struct *vma,
-                                NvU64 start,
-                                size_t length,
-                                uvm_fault_client_type_t client_type,
-                                unsigned long *migrate_start,
-                                unsigned long *migrate_length)
-{
-    unsigned int order;
-    unsigned long outer, aligned_start, aligned_size;
-
-    *migrate_start = start;
-    *migrate_length = length;
-
-    if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
-        order = uvm_exp_perf_prefetch_ats_order_non_replayable;
-    else
-        order = uvm_exp_perf_prefetch_ats_order_replayable;
-
-    if (order == 0)
-        return;
-
-    UVM_ASSERT(vma);
-    UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
-
-    aligned_size = (1UL << order) * PAGE_SIZE;
-
-    aligned_start = start & ~(aligned_size - 1);
-
-    *migrate_start = max(vma->vm_start, aligned_start);
-    outer = min(vma->vm_end, aligned_start + aligned_size);
-    *migrate_length = outer - *migrate_start;
-}
+#if UVM_ATS_PREFETCH_SUPPORTED()
+#include <linux/hmm.h>
+#endif

 static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
                                    struct vm_area_struct *vma,
@@ -122,6 +81,8 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
        .mm                             = mm,
        .dst_id                         = ats_context->residency_id,
        .dst_node_id                    = ats_context->residency_node,
+        .start                          = start,
+        .length                         = length,
        .populate_permissions           = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
        .touch                          = true,
        .skip_mapped                    = true,
@@ -132,13 +93,6 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,

    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));

-    expand_fault_region(vma,
-                        start,
-                        length,
-                        ats_context->client_type,
-                        &uvm_migrate_args.start,
-                        &uvm_migrate_args.length);
-
    // We are trying to use migrate_vma API in the kernel (if it exists) to
    // populate and map the faulting region on the GPU. We want to do this only
    // on the first touch. That is, pages which are not already mapped. So, we
@@ -184,6 +138,12 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
    struct mempolicy *vma_policy = vma_policy(vma);
    unsigned short mode;

+    ats_context->prefetch_state.has_preferred_location = false;
+
+    // It's safe to read vma_policy since the mmap_lock is held in at least read
+    // mode in this path.
+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+
    if (!vma_policy)
        goto done;

@@ -212,6 +172,9 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
            else
                residency = first_node(vma_policy->nodes);
        }
+
+        if (!nodes_empty(vma_policy->nodes))
+            ats_context->prefetch_state.has_preferred_location = true;
    }

    // Update gpu if residency is not the faulting gpu.
@@ -219,12 +182,253 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
        gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);

 done:
+#else
+    ats_context->prefetch_state.has_preferred_location = false;
 #endif

    ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
    ats_context->residency_node = residency;
 }

+static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
+{
+    *start = max(vma->vm_start, (unsigned long) base);
+    *end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
+}
+
+static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
+{
+    UVM_ASSERT(addr >= base);
+    UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
+
+    return (addr - base) / PAGE_SIZE;
+}
+
+// start and end must be aligned to PAGE_SIZE and must fall within
+// [base, base + UVM_VA_BLOCK_SIZE]
+static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
+{
+    // base can be greater than, less than or equal to the start of a VMA.
+    NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
+
+    UVM_ASSERT(start < end);
+    UVM_ASSERT(PAGE_ALIGNED(start));
+    UVM_ASSERT(PAGE_ALIGNED(end));
+    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
+
+    return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
+}
+
+static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
+{
+    NvU64 start;
+    NvU64 end;
+
+    get_range_in_vma(vma, base, &start, &end);
+
+    return uvm_ats_region_from_start_end(start, end);
+}
+
+#if UVM_ATS_PREFETCH_SUPPORTED()
+
+static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
+{
+    uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
+    uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
+
+    // The following write lock protects against concurrent invalidates while
+    // hmm_range_fault() is being called in ats_compute_residency_mask().
+    uvm_down_write(&va_space->ats.lock);
+
+    mmu_interval_set_seq(mni, cur_seq);
+
+    uvm_up_write(&va_space->ats.lock);
+
+    return true;
+}
+
+static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
+                                              const struct mmu_notifier_range *range,
+                                              unsigned long cur_seq)
+{
+    UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
+}
+
+static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
+{
+    .invalidate = uvm_ats_invalidate_notifier_entry,
+};
+
+#endif
+
+static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
+                                            struct vm_area_struct *vma,
+                                            NvU64 base,
+                                            uvm_ats_fault_context_t *ats_context)
+{
+    NV_STATUS status = NV_OK;
+
+#if UVM_ATS_PREFETCH_SUPPORTED()
+    int ret;
+    NvU64 start;
+    NvU64 end;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
+    struct hmm_range range;
+    uvm_page_index_t page_index;
+    uvm_va_block_region_t vma_region;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+
+    uvm_assert_rwsem_locked_read(&va_space->lock);
+
+    ats_context->prefetch_state.first_touch = true;
+
+    uvm_page_mask_zero(residency_mask);
+
+    get_range_in_vma(vma, base, &start, &end);
+
+    vma_region = uvm_ats_region_from_start_end(start, end);
+
+    range.notifier = &ats_context->prefetch_state.notifier;
+    range.start = start;
+    range.end = end;
+    range.hmm_pfns = ats_context->prefetch_state.pfns;
+    range.default_flags = 0;
+    range.pfn_flags_mask = 0;
+    range.dev_private_owner = NULL;
+
+    ats_context->prefetch_state.va_space = va_space;
+
+    // mmu_interval_notifier_insert() will try to acquire mmap_lock for write
+    // and will deadlock since mmap_lock is already held for read in this path.
+    // This is prevented by calling __mmu_notifier_register() during va_space
+    // creation. See the comment in uvm_mmu_notifier_register() for more
+    // details.
+    ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
+    if (ret)
+        return errno_to_nv_status(ret);
+
+    while (true) {
+        range.notifier_seq = mmu_interval_read_begin(range.notifier);
+        ret = hmm_range_fault(&range);
+        if (ret == -EBUSY)
+            continue;
+        if (ret) {
+            status = errno_to_nv_status(ret);
+            UVM_ASSERT(status != NV_OK);
+            break;
+        }
+
+        uvm_down_read(&va_space->ats.lock);
+
+        // Pages may have been freed or re-allocated after hmm_range_fault() is
+        // called. So the PTE might point to a different page or nothing. In the
+        // memory hot-unplug case it is not safe to call page_to_nid() on the
+        // page as the struct page itself may have been freed. To protect
+        // against these cases, uvm_ats_invalidate_entry() blocks on va_space
+        // ATS write lock for concurrent invalidates since va_space ATS lock is
+        // held for read in this path.
+        if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
+            break;
+
+        uvm_up_read(&va_space->ats.lock);
+    }
+
+    if (status == NV_OK) {
+        for_each_va_block_page_in_region(page_index, vma_region) {
+            unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
+
+            if (pfn & HMM_PFN_VALID) {
+                struct page *page = hmm_pfn_to_page(pfn);
+
+                if (page_to_nid(page) == ats_context->residency_node)
+                    uvm_page_mask_set(residency_mask, page_index);
+
+                ats_context->prefetch_state.first_touch = false;
+            }
+        }
+
+        uvm_up_read(&va_space->ats.lock);
+    }
+
+    mmu_interval_notifier_remove(range.notifier);
+
+#endif
+
+    return status;
+}
+
+static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
+                                    struct vm_area_struct *vma,
+                                    uvm_ats_fault_context_t *ats_context,
+                                    uvm_va_block_region_t max_prefetch_region,
+                                    uvm_page_mask_t *faulted_mask)
+{
+    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
+    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
+    uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
+
+    if (uvm_page_mask_empty(faulted_mask))
+        return;
+
+    uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
+                                  faulted_mask,
+                                  uvm_va_block_region_from_mask(NULL, faulted_mask),
+                                  max_prefetch_region,
+                                  residency_mask,
+                                  bitmap_tree,
+                                  prefetch_mask);
+
+    uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
+
+    if (vma->vm_flags & VM_WRITE)
+        uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
+}
+
+static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
+                                    struct vm_area_struct *vma,
+                                    NvU64 base,
+                                    uvm_ats_fault_context_t *ats_context)
+{
+    NV_STATUS status = NV_OK;
+    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
+    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
+
+    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
+        return status;
+
+    if (uvm_page_mask_empty(faulted_mask))
+        return status;
+
+    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        return status;
+
+    // Prefetch the entire region if none of the pages are resident on any node
+    // and if preferred_location is the faulting GPU.
+    if (ats_context->prefetch_state.has_preferred_location &&
+        ats_context->prefetch_state.first_touch &&
+        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
+
+        uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
+        uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
+
+        if (vma->vm_flags & VM_WRITE)
+            uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
+
+        return status;
+    }
+
+    ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
+
+    return status;
+}
+
 NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
                                 struct vm_area_struct *vma,
                                 NvU64 base,
@@ -267,6 +471,8 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

+    ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
+
    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
        NvU64 start = base + (subregion.first * PAGE_SIZE);
        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -54,23 +54,26 @@ bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
    return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
 }

-NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
 {
-    UvmGpuConfComputeMode cc, sys_cc;
-    uvm_gpu_t *first;
+    uvm_gpu_t *first_gpu;

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

+    // The Confidential Computing state of the GPU should match that of the
+    // system.
+    UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
+
    // TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
    // find first child GPU and get its parent.
-    first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
-    if (!first)
-        return NV_OK;
+    first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
+    if (first_gpu == NULL)
+        return;

-    sys_cc = uvm_conf_computing_get_mode(first->parent);
-    cc = uvm_conf_computing_get_mode(parent);
-
-    return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
+    // All GPUs derive Confidential Computing status from their parent. By
+    // current policy all parent GPUs have identical Confidential Computing
+    // status.
+    UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
 }

 static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@@ -60,10 +60,8 @@
 // UVM_METHOD_SIZE * 2 * 10 = 80.
 #define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80

-// All GPUs derive confidential computing status from their parent.
-// By current policy all parent GPUs have identical confidential
-// computing status.
-NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
+
 bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
 bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
 bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -71,11 +71,6 @@ static void uvm_unregister_callbacks(void)
    }
 }

-static void sev_init(const UvmPlatformInfo *platform_info)
-{
-    g_uvm_global.sev_enabled = platform_info->sevEnabled;
-}
-
 NV_STATUS uvm_global_init(void)
 {
    NV_STATUS status;
@@ -124,8 +119,7 @@ NV_STATUS uvm_global_init(void)

    uvm_ats_init(&platform_info);
    g_uvm_global.num_simulated_devices = 0;
-
-    sev_init(&platform_info);
+    g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;

    status = uvm_gpu_init();
    if (status != NV_OK) {
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2021 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -143,11 +143,16 @@ struct uvm_global_struct
        struct page *page;
    } unload_state;

-    // AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
-    // enabled. This field is set once during global initialization
-    // (uvm_global_init), and can be read afterwards without acquiring any
-    // locks.
-    bool sev_enabled;
+    // True if the VM has AMD's SEV, or equivalent HW security extensions such
+    // as Intel's TDX, enabled. The flag is always false on the host.
+    //
+    // This value moves in tandem with that of Confidential Computing in the
+    // GPU(s) in all supported configurations, so it is used as a proxy for the
+    // Confidential Computing state.
+    //
+    // This field is set once during global initialization (uvm_global_init),
+    // and can be read afterwards without acquiring any locks.
+    bool conf_computing_enabled;
 };

 // Initialize global uvm state
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -1099,12 +1099,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
        return status;
    }

-    status = uvm_conf_computing_init_parent_gpu(parent_gpu);
-    if (status != NV_OK) {
-        UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
-                      nvstatusToString(status), parent_gpu->name);
-        return status;
-    }
+    uvm_conf_computing_check_parent_gpu(parent_gpu);

    parent_gpu->pci_dev = gpu_platform_info->pci_dev;
    parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -46,6 +46,7 @@
 #include "uvm_rb_tree.h"
 #include "uvm_perf_prefetch.h"
 #include "nv-kthread-q.h"
+#include <linux/mmu_notifier.h>
 #include "uvm_conf_computing.h"

 // Buffer length to store uvm gpu id, RM device name and gpu uuid.
@@ -192,9 +193,9 @@ typedef struct
    // Mask of successfully serviced read faults on pages in write_fault_mask.
    uvm_page_mask_t reads_serviced_mask;

-    // Temporary mask used for uvm_page_mask_or_equal. This is used since
-    // bitmap_or_equal() isn't present in all linux kernel versions.
-    uvm_page_mask_t tmp_mask;
+    // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
+    // SAM VMA. This is used as input to the prefetcher.
+    uvm_page_mask_t faulted_mask;

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
@@ -204,6 +205,40 @@ typedef struct

    // New residency NUMA node ID of the faulting region.
    int residency_node;
+
+    struct
+    {
+        // True if preferred_location was set on this faulting region.
+        // UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
+        // VMA is is prefetched if preferred_location was set and if first_touch
+        // is true;
+        bool has_preferred_location;
+
+        // True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
+        // node. False if any page in the region is resident somewhere.
+        bool first_touch;
+
+        // Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
+        // SAM VMA.
+        uvm_page_mask_t prefetch_pages_mask;
+
+        // PFN info of the faulting region
+        unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];
+
+        // Faulting/preferred processor residency mask of the faulting region.
+        uvm_page_mask_t residency_mask;
+
+#if defined(NV_MMU_INTERVAL_NOTIFIER)
+        // MMU notifier used to compute residency of this faulting region.
+        struct mmu_interval_notifier notifier;
+#endif
+
+        uvm_va_space_t *va_space;
+
+        // Prefetch temporary state.
+        uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
+    } prefetch_state;
+
 } uvm_ats_fault_context_t;

 struct uvm_fault_service_batch_context_struct
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -1009,6 +1009,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
        bool read_duplicate = false;
        uvm_processor_id_t new_residency;
+        const uvm_va_policy_t *policy;

        // Ensure that the migratability iterator covers the current address
        while (iter.end < address)
@@ -1035,21 +1036,23 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,

        // If the underlying VMA is gone, skip HMM migrations.
        if (uvm_va_block_is_hmm(va_block)) {
-            status = uvm_hmm_find_vma(&service_context->block_context, address);
+            status = uvm_hmm_find_vma(service_context->block_context.mm,
+                                      &service_context->block_context.hmm.vma,
+                                      address);
            if (status == NV_ERR_INVALID_ADDRESS)
                continue;

            UVM_ASSERT(status == NV_OK);
        }

-        service_context->block_context.policy = uvm_va_policy_get(va_block, address);
+        policy = uvm_va_policy_get(va_block, address);

        new_residency = uvm_va_block_select_residency(va_block,
                                                      &service_context->block_context,
                                                      page_index,
                                                      processor,
                                                      uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
-                                                      service_context->block_context.policy,
+                                                      policy,
                                                      &thrashing_hint,
                                                      UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
                                                      &read_duplicate);
@@ -1094,12 +1097,17 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
            while (first_page_index <= last_page_index) {
                uvm_page_index_t outer = last_page_index + 1;
+                const uvm_va_policy_t *policy;

                if (uvm_va_block_is_hmm(va_block)) {
-                    status = uvm_hmm_find_policy_vma_and_outer(va_block,
-                                                               &service_context->block_context,
-                                                               first_page_index,
-                                                               &outer);
+                    status = NV_ERR_INVALID_ADDRESS;
+                    if (service_context->block_context.mm) {
+                        status = uvm_hmm_find_policy_vma_and_outer(va_block,
+                                                                   &service_context->block_context.hmm.vma,
+                                                                   first_page_index,
+                                                                   &policy,
+                                                                   &outer);
+                    }
                    if (status != NV_OK)
                        break;
                }
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -343,6 +343,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
    bool read_duplicate;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    const uvm_va_policy_t *policy;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -352,7 +353,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
    UVM_ASSERT(fault_entry->fault_address >= va_block->start);
    UVM_ASSERT(fault_entry->fault_address <= va_block->end);

-    service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
+    policy = uvm_va_policy_get(va_block, fault_entry->fault_address);

    if (service_context->num_retries == 0) {
        // notify event to tools/performance heuristics. For now we use a
@@ -361,7 +362,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
        uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
                                        va_block,
                                        gpu->id,
-                                        service_context->block_context.policy->preferred_location,
+                                        policy->preferred_location,
                                        fault_entry,
                                        ++non_replayable_faults->batch_id,
                                        false);
@@ -396,7 +397,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
                                                  page_index,
                                                  gpu->id,
                                                  fault_entry->access_type_mask,
-                                                  service_context->block_context.policy,
+                                                  policy,
                                                  &thrashing_hint,
                                                  UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
                                                  &read_duplicate);
@@ -678,10 +679,17 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
    fault_entry->fault_source.channel_id = user_channel->hw_channel_id;

    if (!fault_entry->is_fatal) {
-        status = uvm_va_block_find_create(fault_entry->va_space,
-                                          fault_entry->fault_address,
-                                          va_block_context,
-                                          &va_block);
+        if (mm) {
+            status = uvm_va_block_find_create(fault_entry->va_space,
+                                              fault_entry->fault_address,
+                                              &va_block_context->hmm.vma,
+                                              &va_block);
+        }
+        else {
+            status = uvm_va_block_find_create_managed(fault_entry->va_space,
+                                                      fault_entry->fault_address,
+                                                      &va_block);
+        }
        if (status == NV_OK)
            status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
        else
@@ -734,8 +742,6 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
        // Differently to replayable faults, we do not batch up and preprocess
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
-        //
-        // TODO: Bug 2103669: [UVM/ATS] Optimize ATS fault servicing
        for (i = 0; i < cached_faults; ++i) {
            status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
            if (status != NV_OK)
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1322,6 +1322,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
    uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
    uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
+    const uvm_va_policy_t *policy;
    NvU64 end;

    // Check that all uvm_fault_access_type_t values can fit into an NvU8
@@ -1347,13 +1348,13 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
    UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);

    if (uvm_va_block_is_hmm(va_block)) {
-        uvm_hmm_find_policy_end(va_block,
-                                &block_context->block_context,
-                                ordered_fault_cache[first_fault_index]->fault_address,
-                                &end);
+        policy = uvm_hmm_find_policy_end(va_block,
+                                         block_context->block_context.hmm.vma,
+                                         ordered_fault_cache[first_fault_index]->fault_address,
+                                         &end);
    }
    else {
-        block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
+        policy = uvm_va_range_get_policy(va_block->va_range);
        end = va_block->end;
    }

@@ -1393,7 +1394,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
            update_batch_and_notify_fault(gpu,
                                          batch_context,
                                          va_block,
-                                          block_context->block_context.policy->preferred_location,
+                                          policy->preferred_location,
                                          current_entry,
                                          is_duplicate);
        }
@@ -1473,7 +1474,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
                                                      page_index,
                                                      gpu->id,
                                                      service_access_type_mask,
-                                                      block_context->block_context.policy,
+                                                      policy,
                                                      &thrashing_hint,
                                                      UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
                                                      &read_duplicate);
@@ -1625,21 +1626,25 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
    uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
    const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
    const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
-    const uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
    const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
-    uvm_page_mask_t *tmp_mask = &ats_context->tmp_mask;
+    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
+    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;

    UVM_ASSERT(vma);

    ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;

-    uvm_page_mask_or(tmp_mask, write_fault_mask, read_fault_mask);
+    uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);

    status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);

-    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, tmp_mask));
+    // Remove prefetched pages from the serviced mask since fault servicing
+    // failures belonging to prefetch pages need to be ignored.
+    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);

-    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, tmp_mask)) {
+    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
+
+    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
        (*block_faults) += (fault_index_end - fault_index_start);
        return status;
    }
@@ -1867,7 +1872,13 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
        va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
    }

-    status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
+    if (va_range)
+        status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, &va_block);
+    else if (mm)
+        status = uvm_hmm_va_block_find_create(va_space, fault_address, &va_block_context->hmm.vma, &va_block);
+    else
+        status = NV_ERR_INVALID_ADDRESS;
+
    if (status == NV_OK) {
        status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, block_faults);
    }
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -110,7 +110,20 @@ typedef struct

 bool uvm_hmm_is_enabled_system_wide(void)
 {
-    return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
+    if (uvm_disable_hmm)
+        return false;
+
+    if (g_uvm_global.ats.enabled)
+        return false;
+
+    // Confidential Computing and HMM impose mutually exclusive constraints. In
+    // Confidential Computing the GPU can only access pages resident in vidmem,
+    // but in HMM pages may be required to be resident in sysmem: file backed
+    // VMAs, huge pages, etc.
+    if (g_uvm_global.conf_computing_enabled)
+        return false;
+
+    return uvm_va_space_mm_enabled_system();
 }

 bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
@@ -127,32 +140,17 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
    return container_of(node, uvm_va_block_t, hmm.node);
 }

-NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
+void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
 {
    uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
-    struct mm_struct *mm = va_space->va_space_mm.mm;
-    int ret;

    if (!uvm_hmm_is_enabled(va_space))
-        return NV_OK;
-
-    uvm_assert_mmap_lock_locked_write(mm);
-    uvm_assert_rwsem_locked_write(&va_space->lock);
+        return;

    uvm_range_tree_init(&hmm_va_space->blocks);
    uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);

-    // Initialize MMU interval notifiers for this process.
-    // This allows mmu_interval_notifier_insert() to be called without holding
-    // the mmap_lock for write.
-    // Note: there is no __mmu_notifier_unregister(), this call just allocates
-    // memory which is attached to the mm_struct and freed when the mm_struct
-    // is freed.
-    ret = __mmu_notifier_register(NULL, mm);
-    if (ret)
-        return errno_to_nv_status(ret);
-
-    return NV_OK;
+    return;
 }

 void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
@@ -325,7 +323,6 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
    region = uvm_va_block_region_from_start_end(va_block, start, end);

    va_block_context->hmm.vma = NULL;
-    va_block_context->policy = NULL;

    // We only need to unmap GPUs since Linux handles the CPUs.
    for_each_gpu_id_in_mask(id, &va_block->mapped) {
@@ -444,11 +441,11 @@ static void hmm_va_block_init(uvm_va_block_t *va_block,
 static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
                                          NvU64 addr,
                                          bool allow_unreadable_vma,
-                                          uvm_va_block_context_t *va_block_context,
+                                          struct vm_area_struct **vma_out,
                                          uvm_va_block_t **va_block_ptr)
 {
-    struct mm_struct *mm = va_space->va_space_mm.mm;
-    struct vm_area_struct *vma;
+    struct mm_struct *mm;
+    struct vm_area_struct *va_block_vma;
    uvm_va_block_t *va_block;
    NvU64 start, end;
    NV_STATUS status;
@@ -457,15 +454,14 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
    if (!uvm_hmm_is_enabled(va_space))
        return NV_ERR_INVALID_ADDRESS;

-    UVM_ASSERT(mm);
-    UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
+    mm = va_space->va_space_mm.mm;
    uvm_assert_mmap_lock_locked(mm);
    uvm_assert_rwsem_locked(&va_space->lock);
    UVM_ASSERT(PAGE_ALIGNED(addr));

    // Note that we have to allow PROT_NONE VMAs so that policies can be set.
-    vma = find_vma(mm, addr);
-    if (!uvm_hmm_vma_is_valid(vma, addr, allow_unreadable_vma))
+    va_block_vma = find_vma(mm, addr);
+    if (!uvm_hmm_vma_is_valid(va_block_vma, addr, allow_unreadable_vma))
        return NV_ERR_INVALID_ADDRESS;

    // Since we only hold the va_space read lock, there can be multiple
@@ -517,8 +513,8 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,

 done:
    uvm_mutex_unlock(&va_space->hmm.blocks_lock);
-    if (va_block_context)
-        va_block_context->hmm.vma = vma;
+    if (vma_out)
+        *vma_out = va_block_vma;
    *va_block_ptr = va_block;
    return NV_OK;

@@ -532,43 +528,36 @@ err_unlock:

 NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
                                       NvU64 addr,
-                                       uvm_va_block_context_t *va_block_context,
+                                       struct vm_area_struct **vma,
                                       uvm_va_block_t **va_block_ptr)
 {
-    return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
+    return hmm_va_block_find_create(va_space, addr, false, vma, va_block_ptr);
 }

-NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
+NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr)
 {
-    struct mm_struct *mm = va_block_context->mm;
-    struct vm_area_struct *vma;
-
    if (!mm)
        return NV_ERR_INVALID_ADDRESS;

    uvm_assert_mmap_lock_locked(mm);

-    vma = find_vma(mm, addr);
-    if (!uvm_hmm_vma_is_valid(vma, addr, false))
+    *vma_out = find_vma(mm, addr);
+    if (!uvm_hmm_vma_is_valid(*vma_out, addr, false))
        return NV_ERR_INVALID_ADDRESS;

-    va_block_context->hmm.vma = vma;
-
    return NV_OK;
 }

 bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
-                                        uvm_va_block_context_t *va_block_context,
+                                        struct vm_area_struct *vma,
                                        uvm_va_block_region_t region)
 {
    uvm_assert_mutex_locked(&va_block->lock);

    if (uvm_va_block_is_hmm(va_block)) {
-        struct vm_area_struct *vma = va_block_context->hmm.vma;
-
        UVM_ASSERT(vma);
-        UVM_ASSERT(va_block_context->mm == vma->vm_mm);
-        uvm_assert_mmap_lock_locked(va_block_context->mm);
+        UVM_ASSERT(va_block->hmm.va_space->va_space_mm.mm == vma->vm_mm);
+        uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
        UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
        UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
    }
@@ -619,8 +608,6 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
    uvm_mutex_lock(&va_block->lock);

    uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
-        va_block_context->policy = policy;
-
        // Even though UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock the
        // va_block lock, the policy remains valid because we hold the mmap
        // lock so munmap can't remove the policy, and the va_space lock so the
@@ -670,7 +657,6 @@ void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
                continue;

            block_context->hmm.vma = vma;
-            block_context->policy = &uvm_va_policy_default;
            uvm_hmm_va_block_migrate_locked(va_block,
                                            NULL,
                                            block_context,
@@ -1046,11 +1032,7 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
            uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
            uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);

-        va_block_context->policy = uvm_va_policy_set_preferred_location(va_block,
-                                                                        region,
-                                                                        preferred_location,
-                                                                        old_policy);
-        if (!va_block_context->policy)
+        if (!uvm_va_policy_set_preferred_location(va_block, region, preferred_location, old_policy))
            return NV_ERR_NO_MEMORY;

        // Establish new remote mappings if the old preferred location had
@@ -1109,7 +1091,7 @@ NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
    for (addr = base; addr < last_address; addr = va_block->end + 1) {
        NvU64 end;

-        status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
+        status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
        if (status != NV_OK)
            break;

@@ -1151,7 +1133,6 @@ static NV_STATUS hmm_set_accessed_by_start_end_locked(uvm_va_block_t *va_block,
        if (uvm_va_policy_is_read_duplicate(&node->policy, va_space))
            continue;

-        va_block_context->policy = &node->policy;
        region = uvm_va_block_region_from_start_end(va_block,
                                                    max(start, node->node.start),
                                                    min(end, node->node.end));
@@ -1196,7 +1177,7 @@ NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
    for (addr = base; addr < last_address; addr = va_block->end + 1) {
        NvU64 end;

-        status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
+        status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
        if (status != NV_OK)
            break;

@@ -1249,8 +1230,6 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
    uvm_mutex_lock(&va_block->lock);

    uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
-        block_context->policy = &node->policy;
-
        for_each_id_in_mask(id, &node->policy.accessed_by) {
            status = hmm_set_accessed_by_start_end_locked(va_block,
                                                          block_context,
@@ -1309,13 +1288,13 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
    }
 }

-void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
-                             uvm_va_block_context_t *va_block_context,
-                             unsigned long addr,
-                             NvU64 *endp)
+const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                               struct vm_area_struct *vma,
+                                               unsigned long addr,
+                                               NvU64 *endp)
 {
-    struct vm_area_struct *vma = va_block_context->hmm.vma;
    const uvm_va_policy_node_t *node;
+    const uvm_va_policy_t *policy;
    NvU64 end = va_block->end;

    uvm_assert_mmap_lock_locked(vma->vm_mm);
@@ -1326,40 +1305,45 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,

    node = uvm_va_policy_node_find(va_block, addr);
    if (node) {
-        va_block_context->policy = &node->policy;
+        policy = &node->policy;
        if (end > node->node.end)
            end = node->node.end;
    }
    else {
-        va_block_context->policy = &uvm_va_policy_default;
+        policy = &uvm_va_policy_default;
    }

    *endp = end;
+
+    return policy;
 }

 NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
-                                            uvm_va_block_context_t *va_block_context,
+                                            struct vm_area_struct **vma_out,
                                            uvm_page_index_t page_index,
+                                            const uvm_va_policy_t **policy,
                                            uvm_page_index_t *outerp)
 {
-    struct vm_area_struct *vma;
    unsigned long addr;
    NvU64 end;
    uvm_page_index_t outer;
+    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
+    struct mm_struct *mm = va_space->va_space_mm.mm;
+
+    if (!mm)
+        return NV_ERR_INVALID_ADDRESS;

    UVM_ASSERT(uvm_va_block_is_hmm(va_block));
-    uvm_assert_mmap_lock_locked(va_block_context->mm);
+    uvm_assert_mmap_lock_locked(mm);
    uvm_assert_mutex_locked(&va_block->lock);

    addr = uvm_va_block_cpu_page_address(va_block, page_index);

-    vma = vma_lookup(va_block_context->mm, addr);
-    if (!vma || !(vma->vm_flags & VM_READ))
+    *vma_out = vma_lookup(mm, addr);
+    if (!*vma_out || !((*vma_out)->vm_flags & VM_READ))
        return NV_ERR_INVALID_ADDRESS;

-    va_block_context->hmm.vma = vma;
-
-    uvm_hmm_find_policy_end(va_block, va_block_context, addr, &end);
+    *policy = uvm_hmm_find_policy_end(va_block, *vma_out, addr, &end);

    outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
    if (*outerp > outer)
@@ -1379,8 +1363,6 @@ static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
    uvm_mutex_lock(&va_block->lock);

    uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
-        block_context->policy = policy;
-
        // Unmap may split PTEs and require a retry. Needs to be called
        // before the pinned pages information is destroyed.
        status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
@@ -1424,11 +1406,10 @@ NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
 }

 uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
-                                                  uvm_va_block_context_t *va_block_context,
+                                                  struct vm_area_struct *vma,
+                                                  const uvm_va_policy_t *policy,
                                                  NvU64 address)
 {
-    struct vm_area_struct *vma = va_block_context->hmm.vma;
-    const uvm_va_policy_t *policy = va_block_context->policy;
    NvU64 start, end;

    UVM_ASSERT(uvm_va_block_is_hmm(va_block));
@@ -1457,13 +1438,11 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
 }

 uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
-                                        uvm_va_block_context_t *va_block_context,
+                                        struct vm_area_struct *vma,
                                        NvU64 addr)
 {
-    struct vm_area_struct *vma = va_block_context->hmm.vma;
-
    UVM_ASSERT(uvm_va_block_is_hmm(va_block));
-    uvm_assert_mmap_lock_locked(va_block_context->mm);
+    uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
    UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);

    if (!(vma->vm_flags & VM_READ))
@@ -2907,8 +2886,6 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
    if (status != NV_OK)
        return status;

-    UVM_ASSERT(!uvm_va_policy_is_read_duplicate(va_block_context->policy, va_block->hmm.va_space));
-
    status = uvm_va_block_make_resident_copy(va_block,
                                             va_block_retry,
                                             va_block_context,
@@ -3140,7 +3117,7 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
    for (addr = base; addr < last_address; addr = end + 1) {
        struct vm_area_struct *vma;

-        status = hmm_va_block_find_create(va_space, addr, false, va_block_context, &va_block);
+        status = hmm_va_block_find_create(va_space, addr, false, &va_block_context->hmm.vma, &va_block);
        if (status != NV_OK)
            return status;

@@ -3232,7 +3209,6 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
    uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
        npages = uvm_va_block_region_num_pages(region);

-        va_block_context->policy = policy;
        if (out_accessed_by_set && uvm_processor_mask_get_count(&policy->accessed_by) > 0)
            *out_accessed_by_set = true;

--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@@ -49,9 +49,7 @@ typedef struct
    bool uvm_hmm_is_enabled_system_wide(void);

    // Initialize HMM for the given the va_space.
-    // Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
-    // and the va_space lock must be held in write mode.
-    NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
+    void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);

    // Destroy any HMM state for the given the va_space.
    // Locking: va_space lock must be held in write mode.
@@ -90,31 +88,30 @@ typedef struct
    // address 'addr' or the VMA does not have at least PROT_READ permission.
    // The caller is also responsible for checking that there is no UVM
    // va_range covering the given address before calling this function.
-    // If va_block_context is not NULL, the VMA is cached in
-    // va_block_context->hmm.vma.
+    // The VMA is returned in vma_out if it's not NULL.
    // Locking: This function must be called with mm retained and locked for
    // at least read and the va_space lock at least for read.
    NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
                                           NvU64 addr,
-                                           uvm_va_block_context_t *va_block_context,
+                                           struct vm_area_struct **vma_out,
                                           uvm_va_block_t **va_block_ptr);

-    // Find the VMA for the given address and set va_block_context->hmm.vma.
-    // Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
-    // is no VMA associated with the address 'addr' or the VMA does not have at
-    // least PROT_READ permission.
+    // Find the VMA for the given address and return it in vma_out. Return
+    // NV_ERR_INVALID_ADDRESS if mm is NULL or there is no VMA associated with
+    // the address 'addr' or the VMA does not have at least PROT_READ
+    // permission.
    // Locking: This function must be called with mm retained and locked for
    // at least read or mm equal to NULL.
-    NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
+    NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr);

-    // If va_block is a HMM va_block, check that va_block_context->hmm.vma is
-    // not NULL and covers the given region. This always returns true and is
-    // intended to only be used with UVM_ASSERT().
+    // If va_block is a HMM va_block, check that vma is not NULL and covers the
+    // given region. This always returns true and is intended to only be used
+    // with UVM_ASSERT().
    // Locking: This function must be called with the va_block lock held and if
-    // va_block is a HMM block, va_block_context->mm must be retained and
-    // locked for at least read.
+    // va_block is a HMM block, va_space->va_space_mm.mm->mmap_lock must be
+    // retained and locked for at least read.
    bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
-                                            uvm_va_block_context_t *va_block_context,
+                                            struct vm_area_struct *vma,
                                            uvm_va_block_region_t region);

    // Initialize the HMM portion of the service_context.
@@ -225,31 +222,29 @@ typedef struct
        return NV_OK;
    }

-    // This function assigns va_block_context->policy to the policy covering
-    // the given address 'addr' and assigns the ending address '*endp' to the
-    // minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
-    // ending address of the policy range. Note that va_block_context->hmm.vma
-    // is expected to be initialized before calling this function.
-    // Locking: This function must be called with
-    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
-    // the va_block lock held.
-    void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
-                                 uvm_va_block_context_t *va_block_context,
-                                 unsigned long addr,
-                                 NvU64 *endp);
+    // This function returns the policy covering the given address 'addr' and
+    // assigns the ending address '*endp' to the minimum of va_block->end,
+    // vma->vm_end - 1, and the ending address of the policy range. Locking:
+    // This function must be called with vma->vm_mm retained and locked for at
+    // least read and the va_block and va_space lock held.
+    const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                                   struct vm_area_struct *vma,
+                                                   unsigned long addr,
+                                                   NvU64 *endp);

-    // This function finds the VMA for the page index 'page_index' and assigns
-    // it to va_block_context->vma, sets va_block_context->policy to the policy
-    // covering the given address, and sets the ending page range '*outerp'
-    // to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
-    // ending address of the policy range, and va_block->end.
-    // Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
-    // Locking: This function must be called with
-    // va_block_context->hmm.vma->vm_mm retained and locked for least read and
-    // the va_block lock held.
+    // This function finds the VMA for the page index 'page_index' and returns
+    // it in vma_out which must not be NULL. Returns the policy covering the
+    // given address, and sets the ending page range '*outerp' to the minimum of
+    // *outerp, vma->vm_end - 1, the ending address of the policy range, and
+    // va_block->end.
+    // Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise sets *vma
+    // and returns NV_OK.
+    // Locking: This function must be called with mm retained and locked for at
+    // least read and the va_block and va_space lock held.
    NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
-                                                uvm_va_block_context_t *va_block_context,
+                                                struct vm_area_struct **vma,
                                                uvm_page_index_t page_index,
+                                                const uvm_va_policy_t **policy,
                                                uvm_page_index_t *outerp);

    // Clear thrashing policy information from all HMM va_blocks.
@@ -258,24 +253,21 @@ typedef struct

    // Return the expanded region around 'address' limited to the intersection
    // of va_block start/end, vma start/end, and policy start/end.
-    // va_block_context must not be NULL, va_block_context->hmm.vma must be
-    // valid (this is usually set by uvm_hmm_va_block_find_create()), and
-    // va_block_context->policy must be valid.
-    // Locking: the caller must hold mm->mmap_lock in at least read mode, the
-    // va_space lock must be held in at least read mode, and the va_block lock
-    // held.
+    // Locking: the caller must hold va_space->va_space_mm.mm->mmap_lock in at
+    // least read mode, the va_space lock must be held in at least read mode,
+    // and the va_block lock held.
    uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
-                                                      uvm_va_block_context_t *va_block_context,
+                                                      struct vm_area_struct *vma,
+                                                      const uvm_va_policy_t *policy,
                                                      NvU64 address);

    // Return the logical protection allowed of a HMM va_block for the page at
-    // the given address.
-    // va_block_context must not be NULL and va_block_context->hmm.vma must be
-    // valid (this is usually set by uvm_hmm_va_block_find_create()).
-    // Locking: the caller must hold va_block_context->mm mmap_lock in at least
-    // read mode.
+    // the given address within the vma which must be valid. This is usually
+    // obtained from uvm_hmm_va_block_find_create()).
+    // Locking: the caller must hold va_space->va_space_mm.mm mmap_lock in at
+    // least read mode.
    uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
-                                            uvm_va_block_context_t *va_block_context,
+                                            struct vm_area_struct *vma,
                                            NvU64 addr);

    // This is called to service a GPU fault.
@@ -288,9 +280,9 @@ typedef struct
                                              uvm_service_block_context_t *service_context);

    // This is called to migrate a region within a HMM va_block.
-    // va_block_context must not be NULL and va_block_context->policy and
-    // va_block_context->hmm.vma must be valid.
-    // Locking: the va_block_context->mm must be retained, mmap_lock must be
+    // va_block_context must not be NULL and va_block_context->hmm.vma
+    // must be valid.
+    // Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
    // locked, and the va_block lock held.
    NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
                                              uvm_va_block_retry_t *va_block_retry,
@@ -303,7 +295,7 @@ typedef struct
    // UvmMigrate().
    //
    // va_block_context must not be NULL. The caller is not required to set
-    // va_block_context->policy or va_block_context->hmm.vma.
+    // va_block_context->hmm.vma.
    //
    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
    // the va_space read lock must be held.
@@ -412,9 +404,8 @@ typedef struct
        return false;
    }

-    static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
+    static void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
    {
-        return NV_OK;
    }

    static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
@@ -440,19 +431,19 @@ typedef struct

    static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
                                                  NvU64 addr,
-                                                  uvm_va_block_context_t *va_block_context,
+                                                  struct vm_area_struct **vma,
                                                  uvm_va_block_t **va_block_ptr)
    {
        return NV_ERR_INVALID_ADDRESS;
    }

-    static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
+    static NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma, NvU64 addr)
    {
        return NV_OK;
    }

    static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
-                                                   uvm_va_block_context_t *va_block_context,
+                                                   struct vm_area_struct *vma,
                                                   uvm_va_block_region_t region)
    {
        return true;
@@ -533,16 +524,19 @@ typedef struct
        return NV_ERR_INVALID_ADDRESS;
    }

-    static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
-                                        uvm_va_block_context_t *va_block_context,
-                                        unsigned long addr,
-                                        NvU64 *endp)
+    static const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
+                                                          struct vm_area_struct *vma,
+                                                          unsigned long addr,
+                                                          NvU64 *endp)
    {
+        UVM_ASSERT(0);
+        return NULL;
    }

    static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
-                                                       uvm_va_block_context_t *va_block_context,
+                                                       struct vm_area_struct **vma,
                                                       uvm_page_index_t page_index,
+                                                       const uvm_va_policy_t **policy,
                                                       uvm_page_index_t *outerp)
    {
        return NV_OK;
@@ -554,14 +548,15 @@ typedef struct
    }

    static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
-                                                             uvm_va_block_context_t *va_block_context,
+                                                             struct vm_area_struct *vma,
+                                                             const uvm_va_policy_t *policy,
                                                             NvU64 address)
    {
        return (uvm_va_block_region_t){};
    }

    static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
-                                                   uvm_va_block_context_t *va_block_context,
+                                                   struct vm_area_struct *vma,
                                                   NvU64 addr)
    {
        return UVM_PROT_NONE;
--- a/kernel-open/nvidia-uvm/uvm_mem.c
+++ b/kernel-open/nvidia-uvm/uvm_mem.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2022 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -93,8 +93,9 @@ static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
 {
    UVM_ASSERT(uvm_mem_is_sysmem(sysmem));

-    // If SEV is enabled, only unprotected memory can be mapped
-    if (g_uvm_global.sev_enabled)
+    // In Confidential Computing, only unprotected memory can be mapped on the
+    // GPU
+    if (g_uvm_global.conf_computing_enabled)
        return uvm_mem_is_sysmem_dma(sysmem);

    return true;
@@ -737,7 +738,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
            pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
    }

-    if (g_uvm_global.sev_enabled && uvm_mem_is_sysmem_dma(mem))
+    if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
        prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);

    mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
--- a/kernel-open/nvidia-uvm/uvm_mem_test.c
+++ b/kernel-open/nvidia-uvm/uvm_mem_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,10 +44,10 @@ static NvU32 first_page_size(NvU32 page_sizes)

 static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
 {
-    if (g_uvm_global.sev_enabled)
+    if (g_uvm_global.conf_computing_enabled)
        return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
-    else
-        return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
+
+    return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
 }

 static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
@@ -335,9 +335,6 @@ error:

 static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
 {
-    if (g_uvm_global.sev_enabled)
-        return false;
-
    if (g_uvm_global.num_simulated_devices == 0)
        return true;

--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -223,7 +223,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
    NV_STATUS status, tracker_status = NV_OK;

    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, region));
+    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, region));

    if (uvm_va_block_is_hmm(va_block)) {
        status = uvm_hmm_va_block_migrate_locked(va_block,
@@ -234,9 +234,9 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
                                                 UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
    }
    else {
-        va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
+        uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);

-        if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
+        if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
            status = uvm_va_block_make_resident_read_duplicate(va_block,
                                                               va_block_retry,
                                                               va_block_context,
@@ -371,8 +371,6 @@ static bool va_block_should_do_cpu_preunmap(uvm_va_block_t *va_block,
    if (!va_block)
        return true;

-    UVM_ASSERT(va_range_should_do_cpu_preunmap(va_block_context->policy, uvm_va_block_get_va_space(va_block)));
-
    region = uvm_va_block_region_from_start_end(va_block, max(start, va_block->start), min(end, va_block->end));

    uvm_mutex_lock(&va_block->lock);
@@ -496,11 +494,9 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
                                      uvm_tracker_t *out_tracker)
 {
    NvU64 preunmap_range_start = start;
+    uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);

-    UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
-
-    should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
-                                                                                       va_range->va_space);
+    should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(policy, va_range->va_space);

    // Divide migrations into groups of contiguous VA blocks. This is to trigger
    // CPU unmaps for that region before the migration starts.
@@ -577,8 +573,6 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
            break;
        }

-        va_block_context->policy = uvm_va_range_get_policy(va_range);
-
        // For UVM-Lite GPUs, the CUDA driver may suballocate a single va_range
        // into many range groups.  For this reason, we iterate over each va_range first
        // then through the range groups within.
@@ -653,6 +647,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,

    if (mm)
        uvm_assert_mmap_lock_locked(mm);
+    else if (!first_va_range)
+        return NV_ERR_INVALID_ADDRESS;

    va_block_context = uvm_va_block_context_alloc(mm);
    if (!va_block_context)
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -672,6 +672,14 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
        .finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
    };

+    // WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
+    // invalidates on read-only to read-write upgrades
+    //
+    // This code path isn't used on GH180 but we need to maintain consistent
+    // behaviour on systems that do.
+    if (!vma_is_anonymous(args->vma))
+        return NV_WARN_NOTHING_TO_DO;
+
    ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
    if (ret < 0)
        return errno_to_nv_status(ret);
@@ -685,6 +693,24 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
    if (ret < 0)
        return errno_to_nv_status(ret);

+    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
+    //       support for it is added to the Linux kernel
+    //
+    // A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
+    // page can't be migrated (eg. because it's a non-anonymous mapping). We
+    // need this side-effect for SMMU on GH180 to ensure any cached read-only
+    // entries are flushed from SMMU on permission upgrade.
+    //
+    // TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
+    // invalidates on read-only to read-write upgrades
+    //
+    // The above WAR doesn't work for HugeTLBfs mappings because
+    // migrate_vma_setup() will fail in that case.
+    if (!vma_is_anonymous(args->vma)) {
+        migrate_vma_finalize(args);
+        return NV_WARN_NOTHING_TO_DO;
+    }
+
    uvm_migrate_vma_alloc_and_copy(args, state);
    if (state->status == NV_OK) {
        migrate_vma_pages(args);
@@ -858,9 +884,13 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
    start = max(start, vma->vm_start);
    outer = min(outer, vma->vm_end);

-    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
-    //       support for it is added to the Linux kernel
-    if (!vma_is_anonymous(vma))
+    // migrate_vma only supports anonymous VMAs. We check for those after
+    // calling migrate_vma_setup() to workaround Bug 4130089. We need to check
+    // for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
+    // error for those.
+    // TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
+    // invalidates on read-only to read-write upgrades
+    if (is_vm_hugetlb_page(vma))
        return NV_WARN_NOTHING_TO_DO;

    if (uvm_processor_mask_empty(&va_space->registered_gpus))
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
@@ -34,8 +34,8 @@ typedef struct
 {
    uvm_va_space_t                  *va_space;
    struct mm_struct                *mm;
-    unsigned long                   start;
-    unsigned long                   length;
+    const unsigned long             start;
+    const unsigned long             length;
    uvm_processor_id_t              dst_id;

    // dst_node_id may be clobbered by uvm_migrate_pageable().
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -906,11 +906,10 @@ error:
 // --------------|-------------------------||----------------|----------------
 //    vidmem     |           -             ||    vidmem      |      false
 //    sysmem     |           -             ||    sysmem      |      false
-//    default    |        <not set>        ||    vidmem      |      true (1)
+//    default    |        <not set>        ||    vidmem      |      true
 //    default    |         vidmem          ||    vidmem      |      false
 //    default    |         sysmem          ||    sysmem      |      false
 //
-// (1) When SEV mode is enabled, the fallback path is disabled.
 //
 // In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
 // from updating GPU page tables without hypervisor knowledge.
@@ -926,28 +925,27 @@ error:
 //
 static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t location)
 {
-    bool should_location_be_vidmem;
    UVM_ASSERT(tree->gpu != NULL);
    UVM_ASSERT_MSG((location == UVM_APERTURE_VID) ||
                   (location == UVM_APERTURE_SYS) ||
                   (location == UVM_APERTURE_DEFAULT),
                   "Invalid location %s (%d)\n", uvm_aperture_string(location), (int)location);

-    should_location_be_vidmem = uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu)
-                                || uvm_conf_computing_mode_enabled(tree->gpu);
-
    // The page tree of a "fake" GPU used during page tree testing can be in
-    // sysmem even if should_location_be_vidmem is true. A fake GPU can be
-    // identified by having no channel manager.
-    if ((tree->gpu->channel_manager != NULL) && should_location_be_vidmem)
-        UVM_ASSERT(location == UVM_APERTURE_VID);
+    // sysmem in scenarios where a "real" GPU must be in vidmem. Fake GPUs can
+    // be identified by having no channel manager.
+    if (tree->gpu->channel_manager != NULL) {
+
+        if (uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu))
+            UVM_ASSERT(location == UVM_APERTURE_VID);
+        else if (uvm_conf_computing_mode_enabled(tree->gpu))
+            UVM_ASSERT(location == UVM_APERTURE_VID);
+    }

    if (location == UVM_APERTURE_DEFAULT) {
        if (page_table_aperture == UVM_APERTURE_DEFAULT) {
            tree->location = UVM_APERTURE_VID;
-
-            // See the comment (1) above.
-            tree->location_sys_fallback = !g_uvm_global.sev_enabled;
+            tree->location_sys_fallback = true;
        }
        else {
            tree->location = page_table_aperture;
--- a/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
@@ -218,57 +218,11 @@ static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
    }
 }

-// Within a block we only allow prefetching to a single processor. Therefore,
-// if two processors are accessing non-overlapping regions within the same
-// block they won't benefit from prefetching.
-//
-// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
-// a VA block.
-static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
-                                                          uvm_va_block_context_t *va_block_context,
-                                                          uvm_processor_id_t new_residency,
-                                                          const uvm_page_mask_t *faulted_pages,
-                                                          uvm_va_block_region_t faulted_region,
-                                                          uvm_page_mask_t *prefetch_pages,
-                                                          uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
+static void init_bitmap_tree_from_region(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                         uvm_va_block_region_t max_prefetch_region,
+                                         const uvm_page_mask_t *resident_mask,
+                                         const uvm_page_mask_t *faulted_pages)
 {
-    uvm_page_index_t page_index;
-    const uvm_page_mask_t *resident_mask = NULL;
-    const uvm_page_mask_t *thrashing_pages = NULL;
-    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
-    const uvm_va_policy_t *policy = va_block_context->policy;
-    uvm_va_block_region_t max_prefetch_region;
-    NvU32 big_page_size;
-    uvm_va_block_region_t big_pages_region;
-
-    if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
-        va_block->prefetch_info.last_migration_proc_id = new_residency;
-        va_block->prefetch_info.fault_migrations_to_last_proc = 0;
-    }
-
-    // Compute the expanded region that prefetching is allowed from.
-    if (uvm_va_block_is_hmm(va_block)) {
-        max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
-                                                          va_block_context,
-                                                          uvm_va_block_region_start(va_block, faulted_region));
-    }
-    else {
-        max_prefetch_region = uvm_va_block_region_from_block(va_block);
-    }
-
-    uvm_page_mask_zero(prefetch_pages);
-
-    if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
-        resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
-
-    // If this is a first-touch fault and the destination processor is the
-    // preferred location, populate the whole max_prefetch_region.
-    if (uvm_processor_mask_empty(&va_block->resident) &&
-        uvm_id_equal(new_residency, policy->preferred_location)) {
-        uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
-        goto done;
-    }
-
    if (resident_mask)
        uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
    else
@@ -277,6 +231,29 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
    // If we are using a subregion of the va_block, align bitmap_tree
    uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);

+    bitmap_tree->offset = 0;
+    bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
+    bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
+}
+
+static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                             uvm_va_block_t *va_block,
+                                             uvm_va_block_context_t *va_block_context,
+                                             uvm_processor_id_t new_residency,
+                                             const uvm_page_mask_t *faulted_pages,
+                                             uvm_va_block_region_t max_prefetch_region)
+
+{
+    NvU32 big_page_size;
+    uvm_va_block_region_t big_pages_region;
+    uvm_va_space_t *va_space;
+    const uvm_page_mask_t *thrashing_pages;
+
+    UVM_ASSERT(va_block);
+    UVM_ASSERT(va_block_context);
+
+    va_space = uvm_va_block_get_va_space(va_block);
+
    // Get the big page size for the new residency.
    // Assume 64K size if the new residency is the CPU or no GPU va space is
    // registered in the current process for this GPU.
@@ -302,13 +279,9 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
        UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);

        uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
-    }
-    else {
-        bitmap_tree->offset = 0;
-        bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
-    }

-    bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
+        bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
+    }

    thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);

@@ -320,25 +293,99 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
                           max_prefetch_region,
                           faulted_pages,
                           thrashing_pages);
+}

-    // Do not compute prefetch regions with faults on pages that are thrashing
-    if (thrashing_pages)
-        uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
-    else
-        uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
+static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
+                                  uvm_va_block_region_t max_prefetch_region,
+                                  uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                  const uvm_page_mask_t *faulted_pages,
+                                  uvm_page_mask_t *out_prefetch_mask)
+{
+    uvm_page_index_t page_index;

-    // Update the tree using the scratch mask to compute the pages to prefetch
-    for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
+    uvm_page_mask_zero(out_prefetch_mask);
+
+    // Update the tree using the faulted mask to compute the pages to prefetch.
+    for_each_va_block_page_in_region_mask(page_index, faulted_pages, faulted_region) {
        uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);

-        uvm_page_mask_region_fill(prefetch_pages, region);
+        uvm_page_mask_region_fill(out_prefetch_mask, region);

        // Early out if we have already prefetched until the end of the VA block
        if (region.outer == max_prefetch_region.outer)
            break;
    }
+}
+
+// Within a block we only allow prefetching to a single processor. Therefore,
+// if two processors are accessing non-overlapping regions within the same
+// block they won't benefit from prefetching.
+//
+// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
+// a VA block.
+static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
+                                                          uvm_va_block_context_t *va_block_context,
+                                                          uvm_processor_id_t new_residency,
+                                                          const uvm_page_mask_t *faulted_pages,
+                                                          uvm_va_block_region_t faulted_region,
+                                                          uvm_page_mask_t *prefetch_pages,
+                                                          uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
+{
+    const uvm_page_mask_t *resident_mask = NULL;
+    const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, faulted_region);
+    uvm_va_block_region_t max_prefetch_region;
+    const uvm_page_mask_t *thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
+
+    if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
+        va_block->prefetch_info.last_migration_proc_id = new_residency;
+        va_block->prefetch_info.fault_migrations_to_last_proc = 0;
+    }
+
+    // Compute the expanded region that prefetching is allowed from.
+    if (uvm_va_block_is_hmm(va_block)) {
+        max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
+                                                          va_block_context->hmm.vma,
+                                                          policy,
+                                                          uvm_va_block_region_start(va_block, faulted_region));
+    }
+    else {
+        max_prefetch_region = uvm_va_block_region_from_block(va_block);
+    }
+
+    uvm_page_mask_zero(prefetch_pages);
+
+    if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
+        resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
+
+    // If this is a first-touch fault and the destination processor is the
+    // preferred location, populate the whole max_prefetch_region.
+    if (uvm_processor_mask_empty(&va_block->resident) &&
+        uvm_id_equal(new_residency, policy->preferred_location)) {
+        uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
+    }
+    else {
+        init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);
+
+        update_bitmap_tree_from_va_block(bitmap_tree,
+                                         va_block,
+                                         va_block_context,
+                                         new_residency,
+                                         faulted_pages,
+                                         max_prefetch_region);
+
+        // Do not compute prefetch regions with faults on pages that are thrashing
+        if (thrashing_pages)
+            uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
+        else
+            uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
+
+        compute_prefetch_mask(faulted_region,
+                              max_prefetch_region,
+                              bitmap_tree,
+                              &va_block_context->scratch_page_mask,
+                              prefetch_pages);
+    }

-done:
    // Do not prefetch pages that are going to be migrated/populated due to a
    // fault
    uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
@@ -364,31 +411,58 @@ done:
    return uvm_page_mask_weight(prefetch_pages);
 }

-void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
-                                uvm_va_block_context_t *va_block_context,
-                                uvm_processor_id_t new_residency,
-                                const uvm_page_mask_t *faulted_pages,
-                                uvm_va_block_region_t faulted_region,
-                                uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
-                                uvm_perf_prefetch_hint_t *out_hint)
+bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space)
+{
+    if (!g_uvm_perf_prefetch_enable)
+        return false;
+
+    UVM_ASSERT(va_space);
+
+    return va_space->test.page_prefetch_enabled;
+}
+
+void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
+                                   const uvm_page_mask_t *faulted_pages,
+                                   uvm_va_block_region_t faulted_region,
+                                   uvm_va_block_region_t max_prefetch_region,
+                                   const uvm_page_mask_t *residency_mask,
+                                   uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                   uvm_page_mask_t *out_prefetch_mask)
+{
+    UVM_ASSERT(faulted_pages);
+    UVM_ASSERT(bitmap_tree);
+    UVM_ASSERT(out_prefetch_mask);
+
+    uvm_page_mask_zero(out_prefetch_mask);
+
+    if (!uvm_perf_prefetch_enabled(va_space))
+        return;
+
+    init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, residency_mask, faulted_pages);
+
+    compute_prefetch_mask(faulted_region, max_prefetch_region, bitmap_tree, faulted_pages, out_prefetch_mask);
+}
+
+void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
+                                         uvm_va_block_context_t *va_block_context,
+                                         uvm_processor_id_t new_residency,
+                                         const uvm_page_mask_t *faulted_pages,
+                                         uvm_va_block_region_t faulted_region,
+                                         uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                         uvm_perf_prefetch_hint_t *out_hint)
 {
-    const uvm_va_policy_t *policy = va_block_context->policy;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
    uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
    NvU32 pending_prefetch_pages;

    uvm_assert_rwsem_locked(&va_space->lock);
    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
-    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, faulted_region));
+    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, faulted_region));

    out_hint->residency = UVM_ID_INVALID;
    uvm_page_mask_zero(prefetch_pages);

-    if (!g_uvm_perf_prefetch_enable)
-        return;
-
-    if (!va_space->test.page_prefetch_enabled)
+    if (!uvm_perf_prefetch_enabled(va_space))
        return;

    pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,
--- a/kernel-open/nvidia-uvm/uvm_perf_prefetch.h
+++ b/kernel-open/nvidia-uvm/uvm_perf_prefetch.h
@@ -61,21 +61,41 @@ typedef struct
 // Global initialization function (no clean up needed).
 NV_STATUS uvm_perf_prefetch_init(void);

+// Returns whether prefetching is enabled in the VA space.
+// va_space cannot be NULL.
+bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space);
+
+// Return the prefetch mask with the pages that may be prefetched in a ATS
+// block. ATS block is a system allocated memory block with base aligned to
+// UVM_VA_BLOCK_SIZE and a maximum size of UVM_VA_BLOCK_SIZE. The faulted_pages
+// mask and faulted_region are the pages being faulted on the given residency.
+//
+// Only residency_mask can be NULL.
+//
+// Locking: The caller must hold the va_space lock.
+void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
+                                   const uvm_page_mask_t *faulted_pages,
+                                   uvm_va_block_region_t faulted_region,
+                                   uvm_va_block_region_t max_prefetch_region,
+                                   const uvm_page_mask_t *residency_mask,
+                                   uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                   uvm_page_mask_t *out_prefetch_mask);
+
 // Return a hint with the pages that may be prefetched in the block.
 // The faulted_pages mask and faulted_region are the pages being migrated to
 // the given residency.
-// va_block_context must not be NULL, va_block_context->policy must be valid,
-// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
-// which also means the va_block_context->mm is not NULL, retained, and locked
-// for at least read.
+// va_block_context must not be NULL, and if the va_block is a HMM
+// block, va_block_context->hmm.vma must be valid which also means the
+// va_block_context->mm is not NULL, retained, and locked for at least
+// read.
 // Locking: The caller must hold the va_space lock and va_block lock.
-void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
-                                uvm_va_block_context_t *va_block_context,
-                                uvm_processor_id_t new_residency,
-                                const uvm_page_mask_t *faulted_pages,
-                                uvm_va_block_region_t faulted_region,
-                                uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
-                                uvm_perf_prefetch_hint_t *out_hint);
+void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
+                                         uvm_va_block_context_t *va_block_context,
+                                         uvm_processor_id_t new_residency,
+                                         const uvm_page_mask_t *faulted_pages,
+                                         uvm_va_block_region_t faulted_region,
+                                         uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
+                                         uvm_perf_prefetch_hint_t *out_hint);

 void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
                                             uvm_page_index_t page_index,
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@@ -1095,7 +1095,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
    NV_STATUS tracker_status;
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
    uvm_processor_id_t processor_id;
-    const uvm_va_policy_t *policy = va_block_context->policy;
+    const uvm_va_policy_t *policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));

    uvm_assert_mutex_locked(&va_block->lock);

@@ -1141,10 +1141,9 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
 {
    block_thrashing_info_t *block_thrashing;
    uvm_processor_mask_t unmap_processors;
-    const uvm_va_policy_t *policy = va_block_context->policy;
+    const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);

    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));

    block_thrashing = thrashing_info_get(va_block);
    if (!block_thrashing || !block_thrashing->pages)
@@ -1867,8 +1866,6 @@ static void thrashing_unpin_pages(struct work_struct *work)
            UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));

            uvm_va_block_context_init(va_block_context, NULL);
-            va_block_context->policy =
-                uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));

            uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
                                                             va_block_context,
@@ -2123,8 +2120,6 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
                uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
                uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);

-                block_context->policy = uvm_va_range_get_policy(va_range);
-
                uvm_mutex_lock(&va_block->lock);

                // Unmap may split PTEs and require a retry. Needs to be called
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
@@ -103,11 +103,11 @@ void uvm_perf_thrashing_unload(uvm_va_space_t *va_space);
 // Destroy the thrashing detection struct for the given block.
 void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);

-// Unmap remote mappings from all processors on the pinned pages
-// described by region and block_thrashing->pinned pages.
-// va_block_context must not be NULL and va_block_context->policy must be valid.
-// See the comments for uvm_va_block_check_policy_is_valid() in uvm_va_block.h.
-// Locking: the va_block lock must be held.
+// Unmap remote mappings from all processors on the pinned pages described by
+// region and block_thrashing->pinned pages.  va_block_context must not be NULL
+// and policy for the region must match.  See the comments for
+// uvm_va_block_check_policy_is_valid() in uvm_va_block.h.  Locking: the
+// va_block lock must be held.
 NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
                                                           uvm_va_block_context_t *va_block_context,
                                                           uvm_va_block_region_t region);
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@@ -3820,18 +3820,11 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
    // For virtual mode, look up and retain the block first so that eviction can
    // be started without the VA space lock held.
    if (params->eviction_mode == UvmTestEvictModeVirtual) {
-        uvm_va_block_context_t *block_context;
+        if (mm)
+            status = uvm_va_block_find_create(va_space, params->address, NULL, &block);
+        else
+            status = uvm_va_block_find_create_managed(va_space, params->address, &block);

-        block_context = uvm_va_block_context_alloc(mm);
-        if (!block_context) {
-            status = NV_ERR_NO_MEMORY;
-            uvm_va_space_up_read(va_space);
-            uvm_va_space_mm_release_unlock(va_space, mm);
-            goto out;
-        }
-
-        status = uvm_va_block_find_create(va_space, params->address, block_context, &block);
-        uvm_va_block_context_free(block_context);
        if (status != NV_OK) {
            uvm_va_space_up_read(va_space);
            uvm_va_space_mm_or_current_release_unlock(va_space, mm);
--- a/kernel-open/nvidia-uvm/uvm_pmm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -324,7 +324,7 @@ static NV_STATUS gpu_mem_check(uvm_gpu_t *gpu,

    // TODO: Bug 3839176: [UVM][HCC][uvm_test] Update tests that assume GPU
    //                     engines can directly access sysmem
-    // Skip this test for now. To enable this test under SEV,
+    // Skip this test for now. To enable this test in Confidential Computing,
    // The GPU->CPU CE copy needs to be updated so it uses encryption when
    // CC is enabled.
    if (uvm_conf_computing_mode_enabled(gpu))
@@ -1223,8 +1223,6 @@ static NV_STATUS test_indirect_peers(uvm_gpu_t *owning_gpu, uvm_gpu_t *accessing
    if (!chunks)
        return NV_ERR_NO_MEMORY;

-    UVM_ASSERT(!g_uvm_global.sev_enabled);
-
    TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, accessing_gpu), out);
--- a/kernel-open/nvidia-uvm/uvm_policy.c
+++ b/kernel-open/nvidia-uvm/uvm_policy.c
@@ -160,7 +160,7 @@ static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
    NV_STATUS status = NV_OK;
    NV_STATUS tracker_status;
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
-    const uvm_va_policy_t *policy = va_block_context->policy;
+    const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
    uvm_processor_id_t preferred_location = policy->preferred_location;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
    const uvm_page_mask_t *mapped_mask;
@@ -279,6 +279,9 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
        return NV_OK;
    }

+    if (!mm)
+        return NV_ERR_INVALID_ADDRESS;
+
    return uvm_hmm_set_preferred_location(va_space, preferred_location, base, last_address, out_tracker);
 }

@@ -445,7 +448,6 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
    NV_STATUS tracker_status;

    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));

    status = uvm_va_block_add_mappings(va_block,
                                       va_block_context,
@@ -467,13 +469,13 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
    uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
    NV_STATUS status;
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
+    uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);

    UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
-    UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));

    // Read duplication takes precedence over SetAccessedBy. Do not add mappings
    // if read duplication is enabled.
-    if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
+    if (uvm_va_policy_is_read_duplicate(policy, va_space))
        return NV_OK;

    status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
@@ -592,8 +594,15 @@ static NV_STATUS accessed_by_set(uvm_va_space_t *va_space,
        UVM_ASSERT(va_range_last->node.end >= last_address);
    }
    else {
+        // NULL mm case already filtered by uvm_api_range_type_check()
+        UVM_ASSERT(mm);
        UVM_ASSERT(type == UVM_API_RANGE_TYPE_HMM);
-        status = uvm_hmm_set_accessed_by(va_space, processor_id, set_bit, base, last_address, &local_tracker);
+        status = uvm_hmm_set_accessed_by(va_space,
+                                         processor_id,
+                                         set_bit,
+                                         base,
+                                         last_address,
+                                         &local_tracker);
    }

 done:
@@ -656,7 +665,6 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,

    // TODO: Bug 3660922: need to implement HMM read duplication support.
    UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
-    UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));

    status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
                                     va_block_set_read_duplication_locked(va_block,
@@ -675,7 +683,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
    uvm_processor_id_t processor_id;
    uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
    uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
-    const uvm_va_policy_t *policy = va_block_context->policy;
+    const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
    uvm_processor_id_t preferred_location = policy->preferred_location;
    uvm_processor_mask_t accessed_by = policy->accessed_by;

@@ -757,7 +765,6 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();

    UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
-    UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));

    // Restore all SetAccessedBy mappings
    status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
@@ -915,7 +922,6 @@ static NV_STATUS system_wide_atomics_set(uvm_va_space_t *va_space, const NvProce
            if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
                continue;

-            va_block_context->policy = uvm_va_range_get_policy(va_range);
            for_each_va_block_in_va_range(va_range, va_block) {
                uvm_page_mask_t *non_resident_pages = &va_block_context->caller_page_mask;

--- a/kernel-open/nvidia-uvm/uvm_range_group.c
+++ b/kernel-open/nvidia-uvm/uvm_range_group.c
@@ -264,7 +264,6 @@ NV_STATUS uvm_range_group_va_range_migrate(uvm_va_range_t *va_range,
        return NV_ERR_NO_MEMORY;

    uvm_assert_rwsem_locked(&va_range->va_space->lock);
-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    // Iterate over blocks, populating them if necessary
    for (i = uvm_va_range_block_index(va_range, start); i <= uvm_va_range_block_index(va_range, end); ++i) {
--- a/kernel-open/nvidia-uvm/uvm_tools.c
+++ b/kernel-open/nvidia-uvm/uvm_tools.c
@@ -2069,7 +2069,11 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,

        // The RM flavor of the lock is needed to perform ECC checks.
        uvm_va_space_down_read_rm(va_space);
-        status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), block_context, &block);
+        if (mm)
+            status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
+        else
+            status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
+
        if (status != NV_OK)
            goto unlock_and_exit;

--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -106,36 +106,6 @@ uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block)
    return va_space;
 }

-bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
-                                        const uvm_va_policy_t *policy,
-                                        uvm_va_block_region_t region)
-{
-    uvm_assert_mutex_locked(&va_block->lock);
-
-    if (uvm_va_block_is_hmm(va_block)) {
-        const uvm_va_policy_node_t *node;
-
-        if (uvm_va_policy_is_default(policy)) {
-            // There should only be the default policy within the region.
-            node = uvm_va_policy_node_iter_first(va_block,
-                                                 uvm_va_block_region_start(va_block, region),
-                                                 uvm_va_block_region_end(va_block, region));
-            UVM_ASSERT(!node);
-        }
-        else {
-            // The policy node should cover the region.
-            node = uvm_va_policy_node_from_policy(policy);
-            UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
-            UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
-        }
-    }
-    else {
-        UVM_ASSERT(policy == uvm_va_range_get_policy(va_block->va_range));
-    }
-
-    return true;
-}
-
 static NvU64 block_gpu_pte_flag_cacheable(uvm_va_block_t *block, uvm_gpu_t *gpu, uvm_processor_id_t resident_id)
 {
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
@@ -3697,7 +3667,6 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,

    uvm_assert_mutex_locked(&va_block->lock);
    UVM_ASSERT(uvm_va_block_is_hmm(va_block) || va_block->va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));

    resident_mask = block_resident_mask_get_alloc(va_block, dest_id);
    if (!resident_mask)
@@ -3944,7 +3913,6 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,

    // TODO: Bug 3660922: need to implement HMM read duplication support.
    UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
-    UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));

    va_block_context->make_resident.dest_id = dest_id;
    va_block_context->make_resident.cause = cause;
@@ -4742,7 +4710,7 @@ static void block_unmap_cpu(uvm_va_block_t *block, uvm_va_block_region_t region,
 // Given a mask of mapped pages, returns true if any of the pages in the mask
 // are mapped remotely by the given GPU.
 static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
-                                         uvm_va_block_context_t *block_context,
+                                         uvm_page_mask_t *scratch_page_mask,
                                         uvm_gpu_id_t gpu_id,
                                         const uvm_page_mask_t *mapped_pages)
 {
@@ -4764,7 +4732,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
    }

    // Remote pages are pages which are mapped but not resident locally
-    return uvm_page_mask_andnot(&block_context->scratch_page_mask, mapped_pages, &gpu_state->resident);
+    return uvm_page_mask_andnot(scratch_page_mask, mapped_pages, &gpu_state->resident);
 }

 // Writes pte_clear_val to the 4k PTEs covered by clear_page_mask. If
@@ -6659,7 +6627,7 @@ static NV_STATUS block_unmap_gpu(uvm_va_block_t *block,
    if (status != NV_OK)
        return status;

-    only_local_mappings = !block_has_remote_mapping_gpu(block, block_context, gpu->id, pages_to_unmap);
+    only_local_mappings = !block_has_remote_mapping_gpu(block, &block_context->scratch_page_mask, gpu->id, pages_to_unmap);
    tlb_membar = uvm_hal_downgrade_membar_type(gpu, only_local_mappings);

    status = uvm_push_begin_acquire(gpu->channel_manager,
@@ -6794,16 +6762,15 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
 }

 static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
-                                       uvm_va_block_context_t *va_block_context,
+                                       struct vm_area_struct *hmm_vma,
                                       uvm_page_index_t page_index)
 {
-    struct vm_area_struct *vma;
    uvm_prot_t logical_prot;

    if (uvm_va_block_is_hmm(va_block)) {
        NvU64 addr = uvm_va_block_cpu_page_address(va_block, page_index);

-        logical_prot = uvm_hmm_compute_logical_prot(va_block, va_block_context, addr);
+        logical_prot = uvm_hmm_compute_logical_prot(va_block, hmm_vma, addr);
    }
    else {
        uvm_va_range_t *va_range = va_block->va_range;
@@ -6815,6 +6782,8 @@ static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
            logical_prot = UVM_PROT_NONE;
        }
        else {
+            struct vm_area_struct *vma;
+
            vma = uvm_va_range_vma(va_range);

            if (!(vma->vm_flags & VM_READ))
@@ -6864,13 +6833,15 @@ static struct page *block_page_get(uvm_va_block_t *block, block_phys_page_t bloc
 //    with new_prot permissions
 //  - Guarantee that vm_insert_page is safe to use (vma->vm_mm has a reference
 //    and mmap_lock is held in at least read mode)
+//  - For HMM blocks that vma is valid and safe to use, vma->vm_mm has a
+//    reference and mmap_lock is held in at least read mode
 //  - Ensure that the struct page corresponding to the physical memory being
 //    mapped exists
 //  - Manage the block's residency bitmap
 //  - Ensure that the block hasn't been killed (block->va_range is present)
 //  - Update the pte/mapping tracking state on success
 static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
-                                       uvm_va_block_context_t *va_block_context,
+                                       struct vm_area_struct *hmm_vma,
                                       uvm_processor_id_t resident_id,
                                       uvm_page_index_t page_index,
                                       uvm_prot_t new_prot)
@@ -6883,7 +6854,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
    NvU64 addr;
    struct page *page;

-    UVM_ASSERT(uvm_va_block_is_hmm(block) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
+    UVM_ASSERT((uvm_va_block_is_hmm(block) && hmm_vma) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
    UVM_ASSERT(new_prot != UVM_PROT_NONE);
    UVM_ASSERT(new_prot < UVM_PROT_MAX);
    UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], UVM_ID_CPU));
@@ -6904,7 +6875,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,

    // Check for existing VMA permissions. They could have been modified after
    // the initial mmap by mprotect.
-    if (new_prot > compute_logical_prot(block, va_block_context, page_index))
+    if (new_prot > compute_logical_prot(block, hmm_vma, page_index))
        return NV_ERR_INVALID_ACCESS_TYPE;

    if (uvm_va_block_is_hmm(block)) {
@@ -7001,7 +6972,7 @@ static NV_STATUS block_map_cpu_to(uvm_va_block_t *block,

    for_each_va_block_page_in_region_mask(page_index, pages_to_map, region) {
        status = block_map_cpu_page_to(block,
-                                       block_context,
+                                       block_context->hmm.vma,
                                       resident_id,
                                       page_index,
                                       new_prot);
@@ -7234,13 +7205,13 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
    const uvm_page_mask_t *pte_mask;
    uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
    NV_STATUS status;
+    const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);

    va_block_context->mapping.cause = cause;

    UVM_ASSERT(new_prot != UVM_PROT_NONE);
    UVM_ASSERT(new_prot < UVM_PROT_MAX);
    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));

    // Mapping is not supported on the eviction path that doesn't hold the VA
    // space lock.
@@ -7282,7 +7253,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,

    // Map per resident location so we can more easily detect physically-
    // contiguous mappings.
-    map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);
+    map_get_allowed_destinations(va_block, va_block_context, policy, id, &allowed_destinations);

    for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
        if (UVM_ID_IS_CPU(id)) {
@@ -7588,8 +7559,6 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
    NV_STATUS tracker_status;
    uvm_processor_id_t id;

-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
-
    for_each_id_in_mask(id, map_processor_mask) {
        status = uvm_va_block_map(va_block,
                                  va_block_context,
@@ -9573,7 +9542,7 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
 //       could be changed in the future to optimize multiple faults/counters on
 //       contiguous pages.
 static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
-                                         uvm_va_block_context_t *va_block_context,
+                                         struct vm_area_struct *hmm_vma,
                                         uvm_page_index_t page_index,
                                         uvm_processor_id_t fault_processor_id,
                                         uvm_processor_id_t new_residency,
@@ -9586,7 +9555,7 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
    //       query_promote: upgrade access privileges to avoid future faults IF
    //       they don't trigger further revocations.
    new_prot = uvm_fault_access_type_to_prot(access_type);
-    logical_prot = compute_logical_prot(va_block, va_block_context, page_index);
+    logical_prot = compute_logical_prot(va_block, hmm_vma, page_index);

    UVM_ASSERT(logical_prot >= new_prot);

@@ -9729,11 +9698,10 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
    const uvm_page_mask_t *final_page_mask = map_page_mask;
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
-    const uvm_va_policy_t *policy = va_block_context->policy;
+    const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
    uvm_processor_id_t preferred_location;

    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));

    // Read duplication takes precedence over SetAccessedBy.
    //
@@ -9959,8 +9927,6 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
    uvm_range_group_range_iter_t iter;
    uvm_prot_t prot_to_map;

-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
-
    if (UVM_ID_IS_CPU(processor_id) && !uvm_va_block_is_hmm(va_block)) {
        if (!uvm_va_range_vma_check(va_range, va_block_context->mm))
            return NV_OK;
@@ -10207,11 +10173,8 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
 {
    uvm_processor_id_t id;

-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
-                                                  va_block_context->policy,
-                                                  uvm_va_block_region_for_page(page_index)));
    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
-                                                  va_block_context,
+                                                  va_block_context->hmm.vma,
                                                  uvm_va_block_region_for_page(page_index)));

    id = block_select_residency(va_block,
@@ -10255,6 +10218,7 @@ static bool check_access_counters_dont_revoke(uvm_va_block_t *block,
 // Update service_context->prefetch_hint, service_context->per_processor_masks,
 // and service_context->region.
 static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
+                                           const uvm_va_policy_t *policy,
                                           uvm_service_block_context_t *service_context)
 {
    uvm_processor_id_t new_residency;
@@ -10265,20 +10229,19 @@ static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
    if (uvm_processor_mask_get_count(&service_context->resident_processors) == 1) {
        uvm_page_index_t page_index;
        uvm_page_mask_t *new_residency_mask;
-        const uvm_va_policy_t *policy = service_context->block_context.policy;

        new_residency = uvm_processor_mask_find_first_id(&service_context->resident_processors);
        new_residency_mask = &service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency;

        // Update prefetch tracking structure with the pages that will migrate
        // due to faults
-        uvm_perf_prefetch_get_hint(va_block,
-                                   &service_context->block_context,
-                                   new_residency,
-                                   new_residency_mask,
-                                   service_context->region,
-                                   &service_context->prefetch_bitmap_tree,
-                                   &service_context->prefetch_hint);
+        uvm_perf_prefetch_get_hint_va_block(va_block,
+                                            &service_context->block_context,
+                                            new_residency,
+                                            new_residency_mask,
+                                            service_context->region,
+                                            &service_context->prefetch_bitmap_tree,
+                                            &service_context->prefetch_hint);

        // Obtain the prefetch hint and give a fake fault access type to the
        // prefetched pages
@@ -10463,7 +10426,7 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,

    for_each_va_block_page_in_region_mask(page_index, new_residency_mask, service_context->region) {
        new_prot = compute_new_permission(va_block,
-                                          &service_context->block_context,
+                                          service_context->block_context.hmm.vma,
                                          page_index,
                                          processor_id,
                                          new_residency,
@@ -10706,11 +10669,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
    NV_STATUS status = NV_OK;

    uvm_assert_mutex_locked(&va_block->lock);
-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
-                                                  service_context->block_context.policy,
-                                                  service_context->region));
    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
-                                                  &service_context->block_context,
+                                                  service_context->block_context.hmm.vma,
                                                  service_context->region));

    // GPU fault servicing must be done under the VA space read lock. GPU fault
@@ -10724,7 +10684,9 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
    else
        uvm_assert_rwsem_locked_read(&va_space->lock);

-    uvm_va_block_get_prefetch_hint(va_block, service_context);
+    uvm_va_block_get_prefetch_hint(va_block,
+                                   uvm_va_policy_get_region(va_block, service_context->region),
+                                   service_context);

    for_each_id_in_mask(new_residency, &service_context->resident_processors) {
        if (uvm_va_block_is_hmm(va_block)) {
@@ -10757,11 +10719,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
    uvm_va_range_t *va_range = va_block->va_range;
    uvm_prot_t access_prot = uvm_fault_access_type_to_prot(access_type);

-    UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
-                                                  va_block_context->policy,
-                                                  uvm_va_block_region_for_page(page_index)));
    UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
-                                                  va_block_context,
+                                                  va_block_context->hmm.vma,
                                                  uvm_va_block_region_for_page(page_index)));

    // CPU permissions are checked later by block_map_cpu_page.
@@ -10779,8 +10738,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
        // vm_flags at any moment (for example on mprotect) and here we are not
        // guaranteed to have vma->vm_mm->mmap_lock. During tests we ensure that
        // this scenario does not happen.
-        if ((va_block_context->mm || uvm_enable_builtin_tests) &&
-            (access_prot > compute_logical_prot(va_block, va_block_context, page_index)))
+        if (((va_block->hmm.va_space && va_block->hmm.va_space->va_space_mm.mm) || uvm_enable_builtin_tests) &&
+            (access_prot > compute_logical_prot(va_block, va_block_context->hmm.vma, page_index)))
            return NV_ERR_INVALID_ACCESS_TYPE;
    }

@@ -10866,6 +10825,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
    uvm_perf_thrashing_hint_t thrashing_hint;
    uvm_processor_id_t new_residency;
    bool read_duplicate;
+    const uvm_va_policy_t *policy;

    uvm_assert_rwsem_locked(&va_space->lock);

@@ -10874,13 +10834,13 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,

    uvm_assert_mmap_lock_locked(service_context->block_context.mm);

-    service_context->block_context.policy = uvm_va_policy_get(va_block, fault_addr);
+    policy = uvm_va_policy_get(va_block, fault_addr);

    if (service_context->num_retries == 0) {
        // notify event to tools/performance heuristics
        uvm_perf_event_notify_cpu_fault(&va_space->perf_events,
                                        va_block,
-                                        service_context->block_context.policy->preferred_location,
+                                        policy->preferred_location,
                                        fault_addr,
                                        fault_access_type > UVM_FAULT_ACCESS_TYPE_READ,
                                        KSTK_EIP(current));
@@ -10925,7 +10885,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
                                                  page_index,
                                                  UVM_ID_CPU,
                                                  uvm_fault_access_type_mask_bit(fault_access_type),
-                                                  service_context->block_context.policy,
+                                                  policy,
                                                  &thrashing_hint,
                                                  UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
                                                  &read_duplicate);
@@ -11025,7 +10985,6 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
 NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
                                            uvm_va_range_t *va_range,
                                            NvU64 addr,
-                                            uvm_va_block_context_t *va_block_context,
                                            uvm_va_block_t **out_block)
 {
    size_t index;
@@ -11033,12 +10992,7 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
    if (uvm_enable_builtin_tests && atomic_dec_if_positive(&va_space->test.va_block_allocation_fail_nth) == 0)
        return NV_ERR_NO_MEMORY;

-    if (!va_range) {
-        if (!va_block_context || !va_block_context->mm)
-            return NV_ERR_INVALID_ADDRESS;
-        return uvm_hmm_va_block_find_create(va_space, addr, va_block_context, out_block);
-    }
-
+    UVM_ASSERT(va_range);
    UVM_ASSERT(addr >= va_range->node.start);
    UVM_ASSERT(addr <= va_range->node.end);

@@ -11052,14 +11006,32 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
    return uvm_va_range_block_create(va_range, index, out_block);
 }

-NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
+NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
                                   NvU64 addr,
-                                   uvm_va_block_context_t *va_block_context,
                                   uvm_va_block_t **out_block)
 {
    uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);

-    return uvm_va_block_find_create_in_range(va_space, va_range, addr, va_block_context, out_block);
+    if (va_range)
+        return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
+    else
+        return NV_ERR_INVALID_ADDRESS;
+}
+
+NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
+                                   NvU64 addr,
+                                   struct vm_area_struct **hmm_vma,
+                                   uvm_va_block_t **out_block)
+{
+    uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);
+
+    if (hmm_vma)
+        *hmm_vma = NULL;
+
+    if (va_range)
+        return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
+    else
+        return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
 }

 // Launch a synchronous, encrypted copy between GPU and CPU.
@@ -11236,8 +11208,6 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
    if (UVM_ID_IS_INVALID(proc))
        proc = UVM_ID_CPU;

-    block_context->policy = uvm_va_policy_get(va_block, dst);
-
    // Use make_resident() in all cases to break read-duplication, but
    // block_retry can be NULL as if the page is not resident yet we will make
    // it resident on the CPU.
@@ -11406,7 +11376,6 @@ static void block_add_eviction_mappings(void *args)
        uvm_va_range_t *va_range = va_block->va_range;
        NV_STATUS status = NV_OK;

-        block_context->policy = uvm_va_range_get_policy(va_range);
        for_each_id_in_mask(id, &uvm_va_range_get_policy(va_range)->accessed_by) {
            status = uvm_va_block_set_accessed_by(va_block, block_context, id);
            if (status != NV_OK)
@@ -11557,8 +11526,8 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
                                               &accessed_by_set);
    }
    else {
-        block_context->policy = uvm_va_range_get_policy(va_block->va_range);
-        accessed_by_set = uvm_processor_mask_get_count(&block_context->policy->accessed_by) > 0;
+        const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
+        accessed_by_set = uvm_processor_mask_get_count(&policy->accessed_by) > 0;

        // TODO: Bug 1765193: make_resident() breaks read-duplication, but it's
        // not necessary to do so for eviction. Add a version that unmaps only
@@ -11749,19 +11718,16 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
    struct mm_struct *mm;
    uvm_va_block_t *va_block;
    uvm_va_block_test_t *va_block_test;
-    uvm_va_block_context_t *block_context = NULL;
    NV_STATUS status = NV_OK;

    mm = uvm_va_space_mm_or_current_retain_lock(va_space);
    uvm_va_space_down_read(va_space);

-    block_context = uvm_va_block_context_alloc(mm);
-    if (!block_context) {
-        status = NV_ERR_NO_MEMORY;
-        goto out;
-    }
+    if (mm)
+        status = uvm_va_block_find_create(va_space, params->lookup_address, NULL, &va_block);
+    else
+        status = uvm_va_block_find_create_managed(va_space, params->lookup_address, &va_block);

-    status = uvm_va_block_find_create(va_space, params->lookup_address, block_context, &va_block);
    if (status != NV_OK)
        goto out;

@@ -11801,7 +11767,6 @@ block_unlock:
 out:
    uvm_va_space_up_read(va_space);
    uvm_va_space_mm_or_current_release_unlock(va_space, mm);
-    uvm_va_block_context_free(block_context);
    return status;
 }

@@ -11872,7 +11837,11 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
        goto out;
    }

-    status = uvm_va_block_find_create(va_space, params->va, block_context, &block);
+    if (mm)
+        status = uvm_va_block_find_create(va_space, params->va, &block_context->hmm.vma, &block);
+    else
+        status = uvm_va_block_find_create_managed(va_space, params->va, &block);
+
    if (status != NV_OK)
        goto out;

@@ -11899,8 +11868,6 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
        goto out_block;
    }

-    block_context->policy = uvm_va_policy_get(block, params->va);
-
    if (new_prot == UVM_PROT_NONE) {
        status = uvm_va_block_unmap(block, block_context, id, region, NULL, &block->tracker);
    }
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@@ -453,11 +453,12 @@ struct uvm_va_block_struct
        NvU16 fault_migrations_to_last_proc;
    } prefetch_info;

-#if UVM_IS_CONFIG_HMM()
    struct
    {
+#if UVM_IS_CONFIG_HMM()
        // The MMU notifier is registered per va_block.
        struct mmu_interval_notifier notifier;
+#endif

        // This is used to serialize migrations between CPU and GPU while
        // allowing the va_block lock to be dropped.
@@ -487,7 +488,6 @@ struct uvm_va_block_struct
        // Storage node for range tree of va_blocks.
        uvm_range_tree_node_t node;
    } hmm;
-#endif
 };

 // We define additional per-VA Block fields for testing. When
@@ -678,18 +678,8 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
        memset(va_block_context, 0xff, sizeof(*va_block_context));

    va_block_context->mm = mm;
-#if UVM_IS_CONFIG_HMM()
-    va_block_context->hmm.vma = NULL;
-#endif
 }

-// Check that a single policy covers the given region for the given va_block.
-// This always returns true and is intended to only be used with UVM_ASSERT().
-// Locking: the va_block lock must be held.
-bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
-                                        const uvm_va_policy_t *policy,
-                                        uvm_va_block_region_t region);
-
 // TODO: Bug 1766480: Using only page masks instead of a combination of regions
 //       and page masks could simplify the below APIs and their implementations
 //       at the cost of having to scan the whole mask for small regions.
@@ -734,15 +724,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
 // user memory is guaranteed not to happen. Allocation-retry of GPU page tables
 // can still occur though.
 //
-// va_block_context must not be NULL. This function will set a bit in
-// va_block_context->make_resident.pages_changed_residency for each page that
-// changed residency (due to a migration or first population) as a result of the
-// operation and va_block_context->make_resident.all_involved_processors for
-// each processor involved in the copy. This function only sets bits in those
-// masks. It is the caller's responsiblity to zero the masks or not first.
-//
-// va_block_context->policy must also be set by the caller for the given region.
-// See the comments for uvm_va_block_check_policy_is_valid().
+// va_block_context must not be NULL and policy for the region must
+// match. This function will set a bit in
+// va_block_context->make_resident.pages_changed_residency for each
+// page that changed residency (due to a migration or first
+// population) as a result of the operation and
+// va_block_context->make_resident.all_involved_processors for each
+// processor involved in the copy. This function only sets bits in
+// those masks. It is the caller's responsiblity to zero the masks or
+// not first.
 //
 // Notably any status other than NV_OK indicates that the block's lock might
 // have been unlocked and relocked.
@@ -839,7 +829,7 @@ void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
 // pages because the earlier operation can cause a PTE split or merge which is
 // assumed by the later operation.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // If allocation-retry was required as part of the operation and was successful,
@@ -896,7 +886,7 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
 // pages because the earlier operation can cause a PTE split or merge which is
 // assumed by the later operation.
 //
-// va_block_context must not be NULL. The va_block_context->policy is unused.
+// va_block_context must not be NULL.
 //
 // If allocation-retry was required as part of the operation and was successful,
 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@@ -929,7 +919,7 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
 // - Unmap the preferred location's processor from any pages in this region
 //   which are not resident on the preferred location.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: The caller must hold the VA block lock.
@@ -941,7 +931,7 @@ NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
 // location and policy. Waits for the operation to complete before returning.
 // This function should only be called with managed va_blocks.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -956,7 +946,7 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
 // the tracker after all mappings have been started.
 // This function can be called with HMM and managed va_blocks.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: The caller must hold the va_block lock and
@@ -970,7 +960,7 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
 // Breaks SetAccessedBy and remote mappings
 // This function should only be called with managed va_blocks.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -982,7 +972,7 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
 // Restores SetAccessedBy mappings
 // This function should only be called with managed va_blocks.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -1002,10 +992,9 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
 // NV_ERR_INVALID_OPERATION     The access would violate the policies specified
 //                              by UvmPreventMigrationRangeGroups.
 //
-// va_block_context must not be NULL, va_block_context->policy must be valid,
-// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
-// which also means the va_block_context->mm is not NULL, retained, and locked
-// for at least read.
+// va_block_context must not be NULL, policy must match, and if the va_block is
+// a HMM block, va_block_context->hmm.vma must be valid which also means the
+// va_block_context->mm is not NULL, retained, and locked for at least read.
 // Locking: the va_block lock must be held.
 NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
                                                 uvm_va_block_context_t *va_block_context,
@@ -1041,7 +1030,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
 // different pages because the earlier operation can cause a PTE split or merge
 // which is assumed by the later operation.
 //
-// va_block_context must not be NULL. The va_block_context->policy is unused.
+// va_block_context must not be NULL.
 //
 // If allocation-retry was required as part of the operation and was successful,
 // NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@@ -1081,7 +1070,7 @@ NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block,
 // processor_id, which triggered the migration and should have already been
 // mapped).
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // This function acquires/waits for the va_block tracker and updates that
@@ -1112,7 +1101,7 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
 // Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like
 // uvm_va_block_map() indicating that the operation needs to be retried.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
+// va_block_context must not be NULL and policy must for the region must match.
 // See the comments for uvm_va_block_check_policy_is_valid().
 //
 // LOCKING: The caller must hold the va block lock. If va_block_context->mm !=
@@ -1134,7 +1123,7 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
 // If mm != NULL, that mm is used for any CPU mappings which may be created as
 // a result of this call. See uvm_va_block_context_t::mm for details.
 //
-// va_block_context must not be NULL. The va_block_context->policy is unused.
+// va_block_context must not be NULL.
 //
 // LOCKING: The caller must hold the va_block lock. If block_context->mm is not
 // NULL, the caller must hold mm->mmap_lock in at least read mode.
@@ -1225,7 +1214,6 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
 //  - va_space lock must be held in at least read mode
 //
 // service_context->block_context.mm is ignored and vma->vm_mm is used instead.
-// service_context->block_context.policy is set by this function.
 //
 // Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be
 // accessed, for example because it's within a range group which is non-
@@ -1239,10 +1227,10 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
 // (migrations, cache invalidates, etc.) in response to the given service block
 // context.
 //
-// service_context must not be NULL and service_context->block_context.policy
-// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
-// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
-// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
+// service_context must not be NULL and policy for service_context->region must
+// match. See the comments for uvm_va_block_check_policy_is_valid().  If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context->prefetch_hint is set by this function.
 //
 // Locking:
@@ -1267,10 +1255,10 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
 // Performs population of the destination pages, unmapping and copying source
 // pages to new_residency.
 //
-// service_context must not be NULL and service_context->block_context.policy
-// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
-// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
-// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
+// service_context must not be NULL and policy for service_context->region must
+// match.  See the comments for uvm_va_block_check_policy_is_valid().  If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context->prefetch_hint should be set before calling this function.
 //
 // Locking:
@@ -1296,10 +1284,10 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
 // This updates the va_block residency state and maps the faulting processor_id
 // to the new residency (which may be remote).
 //
-// service_context must not be NULL and service_context->block_context.policy
-// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
-// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
-// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
+// service_context must not be NULL and policy for service_context->region must
+// match. See the comments for uvm_va_block_check_policy_is_valid().  If
+// va_block is a HMM block, va_block_context->hmm.vma must be valid.  See the
+// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 // service_context must be initialized by calling uvm_va_block_service_copy()
 // before calling this function.
 //
@@ -1428,40 +1416,34 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
 NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t **out_block);

 // Same as uvm_va_block_find except that the block is created if not found.
-// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block
-// will be created. Otherwise, if addr is not covered by any va_range, HMM is
-// enabled in the va_space, and va_block_context and va_block_context->mm are
-// non-NULL, then a HMM block will be created and va_block_context->hmm.vma is
-// set to the VMA covering 'addr'. The va_block_context->policy field is left
-// unchanged.
-// In either case, if va_block_context->mm is non-NULL, it must be retained and
-// locked in at least read mode. Return values:
+// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range a managed block
+// will be created. If addr is not covered by any va_range and HMM is
+// enabled in the va_space then a HMM block will be created and hmm_vma is
+// set to the VMA covering 'addr'. The va_space_mm must be retained and locked.
+// Otherwise hmm_vma is set to NULL.
+// Return values:
 // NV_ERR_INVALID_ADDRESS   addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor
 //                          a HMM enabled VMA.
 // NV_ERR_NO_MEMORY         memory could not be allocated.
 NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
                                   NvU64 addr,
-                                   uvm_va_block_context_t *va_block_context,
+                                   struct vm_area_struct **hmm_vma,
                                   uvm_va_block_t **out_block);

-// Same as uvm_va_block_find_create except that va_range lookup was already done
-// by the caller. If the supplied va_range is NULL, this function behaves just
-// like when the va_range lookup in uvm_va_block_find_create is NULL.
+// Same as uvm_va_block_find_create except that only managed va_blocks are
+// created if not already present in the VA range. Does not require va_space_mm
+// to be locked or retained.
+NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
+                                           NvU64 addr,
+                                           uvm_va_block_t **out_block);
+
+// Same as uvm_va_block_find_create_managed except that va_range lookup was
+// already done by the caller. The supplied va_range must not be NULL.
 NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
                                            uvm_va_range_t *va_range,
                                            NvU64 addr,
-                                            uvm_va_block_context_t *va_block_context,
                                            uvm_va_block_t **out_block);

-// Same as uvm_va_block_find_create except that only managed va_blocks are
-// created if not already present in the VA range.
-static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
-                                                  NvU64 addr,
-                                                  uvm_va_block_t **out_block)
-{
-    return uvm_va_block_find_create(va_space, addr, NULL, out_block);
-}
-
 // Look up a chunk backing a specific address within the VA block.
 // Returns NULL if none.
 uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
@@ -1476,10 +1458,10 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
 // The caller needs to handle allocation-retry. va_block_retry can be NULL if
 // the destination is the CPU.
 //
-// va_block_context must not be NULL and va_block_context->policy must be valid.
-// See the comments for uvm_va_block_check_policy_is_valid().
-// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
-// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
+// va_block_context must not be NULL and policy for the region must match. See
+// the comments for uvm_va_block_check_policy_is_valid().  If va_block is a HMM
+// block, va_block_context->hmm.vma must be valid.  See the comments for
+// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
 //
 // LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
 //          NULL, va_block_context->mm->mmap_lock must be held in at least
@@ -1497,7 +1479,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
 // The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
 //
 // va_block_context must not be NULL. The caller is not required to set
-// va_block_context->policy or va_block_context->hmm.vma.
+// va_block_context->hmm.vma.
 //
 // The caller needs to support allocation-retry of page tables.
 //
@@ -1569,7 +1551,7 @@ void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block);
 // successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the
 // block's lock was unlocked and relocked.
 //
-// va_block_context must not be NULL. The va_block_context->policy is unused.
+// va_block_context must not be NULL.
 //
 // LOCKING: The caller must hold the va_block lock.
 NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu);
@@ -1650,12 +1632,18 @@ static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_b
    return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block));
 }

-// Create a block region from a va block and page mask. Note that the region
+// Create a block region from a va block and page mask. If va_block is NULL, the
+// region is assumed to cover the maximum va_block size. Note that the region
 // covers the first through the last set bit and may have unset bits in between.
 static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask)
 {
    uvm_va_block_region_t region;
-    uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block);
+    uvm_page_index_t outer;
+
+    if (va_block)
+        outer = uvm_va_block_num_cpu_pages(va_block);
+    else
+        outer = PAGES_PER_UVM_VA_BLOCK;

    region.first = find_first_bit(page_mask->bitmap, outer);
    if (region.first >= outer) {
@@ -2140,15 +2128,14 @@ uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_blo
 // MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
 size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size);

-// Returns the new residency for a page that faulted or triggered access
-// counter notifications. The read_duplicate output parameter indicates if the
-// page meets the requirements to be read-duplicated
-// va_block_context must not be NULL, va_block_context->policy must be valid,
-// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
-// which also means the va_block_context->mm is not NULL, retained, and locked
-// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
-// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
-// Locking: the va_block lock must be held.
+// Returns the new residency for a page that faulted or triggered access counter
+// notifications. The read_duplicate output parameter indicates if the page
+// meets the requirements to be read-duplicated va_block_context must not be
+// NULL, and if the va_block is a HMM block, va_block_context->hmm.vma must be
+// valid which also means the va_block_context->mm is not NULL, retained, and
+// locked for at least read. See the comments for
+// uvm_va_block_check_policy_is_valid() and uvm_hmm_check_context_vma_is_valid()
+// in uvm_hmm.h.  Locking: the va_block lock must be held.
 uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
                                                 uvm_va_block_context_t *va_block_context,
                                                 uvm_page_index_t page_index,
--- a/kernel-open/nvidia-uvm/uvm_va_block_types.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block_types.h
@@ -29,9 +29,7 @@
 #include "uvm_tlb_batch.h"
 #include "uvm_forward_decl.h"

-#if UVM_IS_CONFIG_HMM()
 #include <linux/migrate.h>
-#endif

 // UVM_VA_BLOCK_BITS is 21, meaning the maximum block size is 2MB. Rationale:
 // - 2MB matches the largest Pascal GPU page size so it's a natural fit
@@ -234,9 +232,6 @@ typedef struct
    // the mm, such as creating CPU mappings.
    struct mm_struct *mm;

-    const uvm_va_policy_t *policy;
-
-#if UVM_IS_CONFIG_HMM()
    struct
    {
        // These are used for migrate_vma_*(), hmm_range_fault(), and
@@ -257,10 +252,11 @@ typedef struct
        // Cached VMA pointer. This is only valid while holding the mmap_lock.
        struct vm_area_struct *vma;

+#if UVM_IS_CONFIG_HMM()
        // Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
        struct migrate_vma migrate_vma_args;
-    } hmm;
 #endif
+    } hmm;

    // Convenience buffer for page mask prints
    char page_mask_string_buffer[UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE];
--- a/kernel-open/nvidia-uvm/uvm_va_policy.c
+++ b/kernel-open/nvidia-uvm/uvm_va_policy.c
@@ -54,6 +54,52 @@ const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
    }
 }

+// HMM va_blocks can have different polices for different regions withing the
+// va_block. This function checks the given region is covered by the same policy
+// and asserts if the region is covered by different policies.
+// This always returns true and is intended to only be used with UVM_ASSERT() to
+// avoid calling it on release builds.
+// Locking: the va_block lock must be held.
+static bool uvm_hmm_va_block_assert_policy_is_valid(uvm_va_block_t *va_block,
+                                                    const uvm_va_policy_t *policy,
+                                                    uvm_va_block_region_t region)
+{
+    const uvm_va_policy_node_t *node;
+
+    if (uvm_va_policy_is_default(policy)) {
+        // There should only be the default policy within the region.
+        node = uvm_va_policy_node_iter_first(va_block,
+                                             uvm_va_block_region_start(va_block, region),
+                                             uvm_va_block_region_end(va_block, region));
+        UVM_ASSERT(!node);
+    }
+    else {
+        // The policy node should cover the region.
+        node = uvm_va_policy_node_from_policy(policy);
+        UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
+        UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
+    }
+
+    return true;
+}
+
+const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region)
+{
+    uvm_assert_mutex_locked(&va_block->lock);
+
+    if (uvm_va_block_is_hmm(va_block)) {
+        const uvm_va_policy_t *policy;
+        const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, region));
+
+        policy = node ? &node->policy : &uvm_va_policy_default;
+        UVM_ASSERT(uvm_hmm_va_block_assert_policy_is_valid(va_block, policy, region));
+        return policy;
+    }
+    else {
+        return uvm_va_range_get_policy(va_block->va_range);
+    }
+}
+
 #if UVM_IS_CONFIG_HMM()

 static struct kmem_cache *g_uvm_va_policy_node_cache __read_mostly;
--- a/kernel-open/nvidia-uvm/uvm_va_policy.h
+++ b/kernel-open/nvidia-uvm/uvm_va_policy.h
@@ -100,6 +100,9 @@ bool uvm_va_policy_is_read_duplicate(const uvm_va_policy_t *policy, uvm_va_space
 // Locking: The va_block lock must be held.
 const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr);

+// Same as above but asserts the policy covers the whole region
+const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region);
+
 // Return a uvm_va_policy_node_t given a uvm_va_policy_t pointer.
 static const uvm_va_policy_node_t *uvm_va_policy_node_from_policy(const uvm_va_policy_t *policy)
 {
--- a/kernel-open/nvidia-uvm/uvm_va_range.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -376,7 +376,7 @@ NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
        if (status != NV_OK)
            goto error;

-        if (i == 0 && g_uvm_global.sev_enabled)
+        if (i == 0 && g_uvm_global.conf_computing_enabled)
            mem_alloc_params.dma_owner = gpu;

        if (attrs.is_cacheable) {
@@ -608,7 +608,6 @@ static NV_STATUS va_range_add_gpu_va_space_managed(uvm_va_range_t *va_range,
        uvm_va_block_t *va_block;
        uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);

-        va_block_context->policy = uvm_va_range_get_policy(va_range);

        // TODO: Bug 2090378. Consolidate all per-VA block operations within
        // uvm_va_block_add_gpu_va_space so we only need to take the VA block
@@ -687,7 +686,6 @@ static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
    bool should_enable_read_duplicate;
    uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);

-    va_block_context->policy = uvm_va_range_get_policy(va_range);
    should_enable_read_duplicate =
        uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
        uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu_va_space->gpu);
@@ -769,7 +767,6 @@ static NV_STATUS uvm_va_range_enable_peer_managed(uvm_va_range_t *va_range, uvm_
    uvm_va_space_t *va_space = va_range->va_space;
    uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);

-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, va_block) {
        // TODO: Bug 1767224: Refactor the uvm_va_block_set_accessed_by logic
@@ -1322,7 +1319,6 @@ static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
    if (uvm_processor_mask_empty(mask))
        return NV_OK;

-    block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, block) {
        NV_STATUS status;
@@ -1364,7 +1360,6 @@ static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t
    if (uvm_processor_mask_empty(&va_range->uvm_lite_gpus))
        return NV_OK;

-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, va_block) {
        // UVM-Lite GPUs always map with RWA
@@ -1528,7 +1523,6 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
    uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);

    va_block_context = uvm_va_space_block_context(va_space, mm);
-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, va_block) {
        uvm_processor_id_t id;
@@ -1610,7 +1604,6 @@ NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,

    uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
    va_block_context = uvm_va_space_block_context(va_space, mm);
-    va_block_context->policy = policy;

    for_each_va_block_in_va_range(va_range, va_block) {
        status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
@@ -1657,7 +1650,6 @@ NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_
        return NV_OK;

    va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, va_block) {
        NV_STATUS status = uvm_va_block_set_read_duplication(va_block, va_block_context);
@@ -1679,7 +1671,6 @@ NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct m
        return NV_OK;

    va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
-    va_block_context->policy = uvm_va_range_get_policy(va_range);

    for_each_va_block_in_va_range(va_range, va_block) {
        status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
@@ -1816,7 +1807,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
    if (params->gpuAttributesCount > UVM_MAX_GPUS)
        return NV_ERR_INVALID_ARGUMENT;

-    if (g_uvm_global.sev_enabled && params->gpuAttributesCount == 0)
+    if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
        return NV_ERR_INVALID_ARGUMENT;

    // The mm needs to be locked in order to remove stale HMM va_blocks.
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -242,9 +242,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
    if (status != NV_OK)
        goto fail;

-    status = uvm_hmm_va_space_initialize(va_space);
-    if (status != NV_OK)
-        goto fail;
+    uvm_hmm_va_space_initialize(va_space);

    uvm_va_space_up_write(va_space);
    uvm_up_write_mmap_lock(current->mm);
@@ -2226,11 +2224,12 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
            // address with mremap() so create a new va_block if needed.
            status = uvm_hmm_va_block_find_create(va_space,
                                                  fault_addr,
-                                                  &service_context->block_context,
+                                                  &service_context->block_context.hmm.vma,
                                                  &va_block);
            if (status != NV_OK)
                break;

+            UVM_ASSERT(service_context->block_context.hmm.vma == vma);
            status = uvm_hmm_migrate_begin(va_block);
            if (status != NV_OK)
                break;
--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@@ -274,6 +274,22 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
        }
    }

+    if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
+        #if UVM_CAN_USE_MMU_NOTIFIERS()
+            // Initialize MMU interval notifiers for this process. This allows
+            // mmu_interval_notifier_insert() to be called without holding the
+            // mmap_lock for write.
+            // Note: there is no __mmu_notifier_unregister(), this call just
+            // allocates memory which is attached to the mm_struct and freed
+            // when the mm_struct is freed.
+            ret = __mmu_notifier_register(NULL, current->mm);
+            if (ret)
+                return errno_to_nv_status(ret);
+        #else
+            UVM_ASSERT(0);
+        #endif
+    }
+
    return NV_OK;
 }

--- a/kernel-open/nvidia/detect-self-hosted.h
+++ b/kernel-open/nvidia/detect-self-hosted.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __DETECT_SELF_HOSTED_H__
+#define __DETECT_SELF_HOSTED_H__
+
+// PCI devIds 0x2340-0x237f are for Self-Hosted Hopper
+static inline int pci_devid_is_self_hosted(unsigned short devid)
+{
+    return devid >= 0x2340 && devid <= 0x237f;
+}
+
+#endif
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -96,6 +96,8 @@

 #include "conftest/patches.h"

+#include "detect-self-hosted.h"
+
 #define RM_THRESHOLD_TOTAL_IRQ_COUNT     100000
 #define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
 #define RM_UNHANDLED_TIMEOUT_US          100000
--- a/kernel-open/nvidia/nv_uvm_interface.c
+++ b/kernel-open/nvidia/nv_uvm_interface.c
@@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
    memset(platformInfo, 0, sizeof(*platformInfo));
    platformInfo->atsSupported = nv_ats_supported;

-    platformInfo->sevEnabled = os_cc_enabled;
+    platformInfo->confComputingEnabled = os_cc_enabled;

    status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);