535.86.05

This commit is contained in:
Bernhard Stoeckner
2023-07-18 15:54:53 +02:00
committed by Bernhard Stoeckner
parent 22a077c4fe
commit 10d538dfbd
264 changed files with 67251 additions and 107479 deletions

View File

@@ -24,6 +24,7 @@
#include "uvm_va_range.h"
#include "uvm_ats_faults.h"
#include "uvm_migrate_pageable.h"
#include <linux/mempolicy.h>
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
// these experimental parameters. These are intended to help guide that policy.
@@ -79,7 +80,7 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
NvU64 start,
size_t length,
uvm_fault_access_type_t access_type,
uvm_fault_client_type_t client_type)
uvm_ats_fault_context_t *ats_context)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
@@ -95,17 +96,18 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
// 2) guest physical -> host physical
//
// The overall ATS translation will fault if either of those translations is
// invalid. The get_user_pages() call above handles translation #1, but not
// #2. We don't know if we're running as a guest, but in case we are we can
// force that translation to be valid by touching the guest physical address
// from the CPU. If the translation is not valid then the access will cause
// a hypervisor fault. Note that dma_map_page() can't establish mappings
// used by GPU ATS SVA translations. GPU accesses to host physical addresses
// obtained as a result of the address translation request uses the CPU
// address space instead of the IOMMU address space since the translated
// host physical address isn't necessarily an IOMMU address. The only way to
// establish guest physical to host physical mapping in the CPU address
// space is to touch the page from the CPU.
// invalid. The pin_user_pages() call within uvm_migrate_pageable() call
// below handles translation #1, but not #2. We don't know if we're running
// as a guest, but in case we are we can force that translation to be valid
// by touching the guest physical address from the CPU. If the translation
// is not valid then the access will cause a hypervisor fault. Note that
// dma_map_page() can't establish mappings used by GPU ATS SVA translations.
// GPU accesses to host physical addresses obtained as a result of the
// address translation request uses the CPU address space instead of the
// IOMMU address space since the translated host physical address isn't
// necessarily an IOMMU address. The only way to establish guest physical to
// host physical mapping in the CPU address space is to touch the page from
// the CPU.
//
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
// VM_WRITE, meaning that the mappings are all granted write access on any
@@ -116,20 +118,26 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_migrate_args_t uvm_migrate_args =
{
.va_space = va_space,
.mm = mm,
.dst_id = gpu_va_space->gpu->parent->id,
.dst_node_id = -1,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
.va_space = va_space,
.mm = mm,
.dst_id = ats_context->residency_id,
.dst_node_id = ats_context->residency_node,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
.populate_on_cpu_alloc_failures = true,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
};
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
expand_fault_region(vma, start, length, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
expand_fault_region(vma,
start,
length,
ats_context->client_type,
&uvm_migrate_args.start,
&uvm_migrate_args.length);
// We are trying to use migrate_vma API in the kernel (if it exists) to
// populate and map the faulting region on the GPU. We want to do this only
@@ -165,6 +173,58 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
}
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context)
{
uvm_gpu_t *gpu = gpu_va_space->gpu;
int residency = uvm_gpu_numa_node(gpu);
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
struct mempolicy *vma_policy = vma_policy(vma);
unsigned short mode;
if (!vma_policy)
goto done;
mode = vma_policy->mode;
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
int home_node = NUMA_NO_NODE;
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
if ((mode != MPOL_PREFERRED) && (vma_policy->home_node != NUMA_NO_NODE))
home_node = vma_policy->home_node;
#endif
// Prefer home_node if set. Otherwise, prefer the faulting GPU if it's
// in the list of preferred nodes, else prefer the closest_cpu_numa_node
// to the GPU if closest_cpu_numa_node is in the list of preferred
// nodes. Fallback to the faulting GPU if all else fails.
if (home_node != NUMA_NO_NODE) {
residency = home_node;
}
else if (!node_isset(residency, vma_policy->nodes)) {
int closest_cpu_numa_node = gpu->parent->closest_cpu_numa_node;
if ((closest_cpu_numa_node != NUMA_NO_NODE) && node_isset(closest_cpu_numa_node, vma_policy->nodes))
residency = gpu->parent->closest_cpu_numa_node;
else
residency = first_node(vma_policy->nodes);
}
}
// Update gpu if residency is not the faulting gpu.
if (residency != uvm_gpu_numa_node(gpu))
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
done:
#endif
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
ats_context->residency_node = residency;
}
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
@@ -205,6 +265,8 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_zero(write_fault_mask);
}
ats_batch_select_residency(gpu_va_space, vma, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
@@ -215,7 +277,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, client_type);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
if (status != NV_OK)
return status;
@@ -244,11 +306,12 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, UVM_FAULT_ACCESS_TYPE_READ, client_type);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
if (status != NV_OK)
return status;