570.86.15

2026-02-20 06:53:58 +00:00 · 2025-01-27 19:36:56 +01:00
parent 9d0b0414a5
commit 54d69484da
1166 changed files with 318863 additions and 182687 deletions
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -62,10 +62,9 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
        *gpu_addr = uvm_gpu_address_copy(owning_gpu, uvm_gpu_page_to_phys_address(owning_gpu, page));
    }
    else if (owning_gpu && can_copy_from) {
-        uvm_gpu_identity_mapping_t *gpu_peer_mappings = uvm_gpu_get_peer_mapping(copying_gpu, owning_gpu->id);
        uvm_gpu_phys_address_t phys_addr = uvm_gpu_page_to_phys_address(owning_gpu, page);

-        *gpu_addr = uvm_gpu_address_virtual(gpu_peer_mappings->base + phys_addr.address);
+        *gpu_addr = uvm_gpu_peer_copy_address(owning_gpu, phys_addr.address, copying_gpu);
    }
    else {
        NV_STATUS status = uvm_parent_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
@@ -399,6 +398,38 @@ static NV_STATUS migrate_vma_populate_anon_pages(struct vm_area_struct *vma,
    return status;
 }

+static NV_STATUS zero_non_failed_pages_in_mask(uvm_push_t *push,
+                                               const unsigned long *pfns,
+                                               unsigned long *page_mask,
+                                               unsigned long mask_size,
+                                               migrate_vma_state_t *state)
+{
+    unsigned long i;
+    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
+    uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
+    uvm_gpu_t *zeroing_gpu = uvm_push_get_gpu(push);
+
+    for_each_set_bit(i, page_mask, mask_size) {
+        struct page *page;
+        uvm_gpu_address_t dst_address;
+        NV_STATUS status;
+
+        if (test_bit(i, state->allocation_failed_mask))
+            continue;
+
+        page = migrate_pfn_to_page(pfns[i]);
+        status = migrate_vma_page_copy_address(page, i, dst_id, zeroing_gpu, state, &dst_address);
+        if (status != NV_OK)
+            return status;
+
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
+        uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+        zeroing_gpu->parent->ce_hal->memset_8(push, dst_address, 0, PAGE_SIZE);
+    }
+
+    return NV_OK;
+}
+
 static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
                                             const unsigned long *src,
                                             unsigned long *dst,
@@ -411,36 +442,82 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
    uvm_push_t push;
    unsigned long i;
    uvm_gpu_t *copying_gpu = NULL;
+    uvm_gpu_t *src_gpu = UVM_ID_IS_GPU(src_id) ? uvm_gpu_get(src_id) : NULL;
    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
    uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
    unsigned long *page_mask = state->processors[uvm_id_value(src_id)].page_mask;
    uvm_va_space_t *va_space = uvm_migrate_args->va_space;
+    uvm_tracker_t zero_tracker = UVM_TRACKER_INIT();

    UVM_ASSERT(!bitmap_empty(page_mask, state->num_pages));

+    // Pre-allocate the dst pages and mark the ones that failed
    for_each_set_bit(i, page_mask, state->num_pages) {
-        uvm_gpu_address_t src_address;
-        uvm_gpu_address_t dst_address;
-        struct page *src_page = migrate_pfn_to_page(src[i]);
-        struct page *dst_page;
-
-        UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
-        UVM_ASSERT(src_page);
-
-        dst_page = migrate_vma_alloc_page(state);
+        struct page *dst_page = migrate_vma_alloc_page(state);
        if (!dst_page) {
            __set_bit(i, state->allocation_failed_mask);
            continue;
        }

+        lock_page(dst_page);
+        dst[i] = migrate_pfn(page_to_pfn(dst_page));
+    }
+
+    // Zero destination pages in case of NVLINK copy that can hit STO or XC,
+    // or in case of injected unresolved NVLINK error.
+    // TODO: Bug 4922701: [uvm] Re-evaluate STO handling for ATS migrations
+    //       This can be removed if the false-positive rate of STO
+    //       fast-path is low enough to prefer failing the copy when an STO
+    //       fast-path error is detected.
+    if (UVM_ID_IS_GPU(src_id) &&
+        UVM_ID_IS_GPU(dst_id) &&
+        ((src_gpu->nvlink_status.enabled &&
+        (uvm_parent_gpu_peer_link_type(src_gpu->parent, uvm_gpu_get(dst_id)->parent) >= UVM_GPU_LINK_NVLINK_5)) ||
+        uvm_gpu_get_injected_nvlink_error(src_gpu) == NV_WARN_MORE_PROCESSING_REQUIRED)) {
+        uvm_gpu_t *dst_gpu = uvm_gpu_get(dst_id);
+        uvm_push_t zero_push;
+        
+        status = migrate_vma_zero_begin_push(va_space, dst_id, dst_gpu, start, outer - 1, &zero_push);
+        if (status != NV_OK)
+            return status;
+
+        status = zero_non_failed_pages_in_mask(&zero_push, dst, page_mask, state->num_pages, state);
+
+        uvm_push_end(&zero_push);
+
+        if (status == NV_OK)
+            status = uvm_tracker_add_push_safe(&zero_tracker, &zero_push);
+
+        if (status != NV_OK)
+            return status;
+    }
+
+    for_each_set_bit(i, page_mask, state->num_pages) {
+        uvm_gpu_address_t src_address;
+        uvm_gpu_address_t dst_address;
+        struct page *src_page = migrate_pfn_to_page(src[i]);
+        struct page *dst_page = migrate_pfn_to_page(dst[i]);
+
+        if (test_bit(i, state->allocation_failed_mask))
+            continue;
+
+        UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
+        UVM_ASSERT(src_page);
+        UVM_ASSERT(dst[i] & MIGRATE_PFN_VALID);
+        UVM_ASSERT(dst_page);
+
        if (!copying_gpu) {
            status = migrate_vma_copy_begin_push(va_space, dst_id, src_id, start, outer - 1, &push);
-            if (status != NV_OK) {
-                __free_page(dst_page);
-                return status;
-            }
+            if (status != NV_OK)
+                break;

            copying_gpu = uvm_push_get_gpu(&push);
+            if (src_gpu)
+                UVM_ASSERT(src_gpu == copying_gpu);
+
+            // The zero tracker will be empty if zeroing is not necessary
+            uvm_push_acquire_tracker(&push, &zero_tracker);
+            uvm_tracker_deinit(&zero_tracker);
        }
        else {
            uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -452,18 +529,12 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
        if (status == NV_OK)
            status = migrate_vma_page_copy_address(dst_page, i, dst_id, copying_gpu, state, &dst_address);

-        if (status != NV_OK) {
-            __free_page(dst_page);
+        if (status != NV_OK)
            break;
-        }
-
-        lock_page(dst_page);

        // We'll push one membar later for all copies in this loop
        uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
        copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, PAGE_SIZE);
-
-        dst[i] = migrate_pfn(page_to_pfn(dst_page));
    }

    // TODO: Bug 1766424: If the destination is a GPU and the copy was done by
@@ -523,6 +594,7 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
    unsigned long start = args->start;
    unsigned long outer = args->end;
    NV_STATUS tracker_status;
+    uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;

    uvm_tracker_init(&state->tracker);

@@ -542,6 +614,40 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
    if (state->status == NV_OK)
        state->status = tracker_status;

+    // Check if the copy might have been impacted by NVLINK errors.
+    if (state->status == NV_OK) {
+        uvm_processor_id_t src_id;
+
+        for_each_id_in_mask(src_id, &state->src_processors) {
+            NV_STATUS status;
+
+            // Skip CPU source, even if for some reason the operation went over
+            // NVLINK, it'd be a read and hit poison.
+            if (UVM_ID_IS_CPU(src_id))
+                continue;
+
+            UVM_ASSERT(UVM_ID_IS_GPU(src_id));
+            status = uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_get(src_id));
+
+            // Set state->status to the first error if there's an NVLINK error.
+            // Do not report NV_WARN_MORE_PROCESSING_REQUIRED. The call to the
+            // uvm_migrate_vma_copy_pages above zeroed the destination.
+            // Thus in case of real STO error zeroed pages will be mapped.
+            if (state->status == NV_OK && status != NV_WARN_MORE_PROCESSING_REQUIRED)
+                state->status = status;
+
+            // Record unresolved GPU errors if the caller can use the information
+            if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
+                if (uvm_migrate_args->gpus_to_check_for_nvlink_errors)
+                    uvm_processor_mask_set(uvm_migrate_args->gpus_to_check_for_nvlink_errors, src_id);
+
+                // fail the copy if requested by the caller
+                if (uvm_migrate_args->fail_on_unresolved_sto_errors && state->status == NV_OK)
+                    state->status = NV_ERR_BUSY_RETRY;
+            }
+        }
+    }
+
    // Mark all pages as not migrating if we're failing
    if (state->status != NV_OK)
        migrate_vma_cleanup_pages(args->dst, state->num_pages);
@@ -870,6 +976,14 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
    if (va_space->test.skip_migrate_vma)
        return NV_WARN_NOTHING_TO_DO;

+    // This isn't the right path for a UVM-owned vma. In most cases the callers
+    // will take the correct (managed) path, but we can get here if invoked on a
+    // disabled vma (see uvm_disable_vma()) that has no VA range but still has a
+    // vma. This could cause locking issues if the caller has the VA space
+    // locked and we invoke a UVM fault handler, so avoid it entirely.
+    if (uvm_file_is_nvidia_uvm(vma->vm_file))
+        return NV_ERR_INVALID_ADDRESS;
+
    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
    //       support for it is added to the Linux kernel
    if (!vma_is_anonymous(vma))
@@ -1002,9 +1116,12 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
            return NV_ERR_INVALID_ARGUMENT;
    }
    else {
+        uvm_gpu_t *gpu = uvm_gpu_get(dst_id);
+
        // Incoming dst_node_id is only valid if dst_id belongs to the CPU. Use
        // dst_node_id as the GPU node id if dst_id doesn't belong to the CPU.
-        uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(uvm_gpu_get(dst_id));
+        UVM_ASSERT(gpu->mem_info.numa.enabled);
+        uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
    }

    state = kmem_cache_alloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);