535.43.02

This commit is contained in:
Andy Ritger
2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions

View File

@@ -171,8 +171,8 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
return NV_ERR_NO_MEMORY;
if (flags & ~UVM_INIT_FLAGS_MASK) {
uvm_kvfree(va_space);
return NV_ERR_INVALID_ARGUMENT;
uvm_kvfree(va_space);
return NV_ERR_INVALID_ARGUMENT;
}
uvm_init_rwsem(&va_space->lock, UVM_LOCK_ORDER_VA_SPACE);
@@ -262,6 +262,7 @@ fail:
uvm_perf_destroy_va_space_events(&va_space->perf_events);
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(current->mm);
// See the comment in uvm_va_space_mm_unregister() for why this has to be
// called after releasing the locks.
uvm_va_space_mm_unregister(va_space);
@@ -366,6 +367,11 @@ static void unregister_gpu(uvm_va_space_t *va_space,
}
}
if (va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]) {
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)],
&va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]->tracker);
}
va_space_check_processors_masks(va_space);
}
@@ -412,6 +418,15 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_global_processor_mask_t retained_gpus;
LIST_HEAD(deferred_free_list);
// Normally we'd expect this to happen as part of uvm_mm_release()
// but if userspace never initialized uvm_mm_fd that won't happen.
// We don't have to take the va_space_mm spinlock and update state
// here because we know no other thread can be in or subsequently
// call uvm_api_mm_initialize successfully because the UVM
// file-descriptor has been released.
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
uvm_va_space_mm_unregister(va_space);
// Remove the VA space from the global list before we start tearing things
// down so other threads can't see the VA space in a partially-valid state.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@@ -517,17 +532,8 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_deferred_free_object_list(&deferred_free_list);
// Remove the mm_struct association on this VA space, if any. This may
// invoke uvm_va_space_mm_shutdown(), which in turn will disable all
// channels and wait for any retainers to finish, so it has to be done
// outside of the VA space lock.
//
// Since we must already handle mm shutdown being called at any point prior
// to this call, this call can be made at any point in
// uvm_va_space_destroy(). It's beneficial to do it late after doing all
// deferred frees for GPU VA spaces and channels, because then
// uvm_va_space_mm_shutdown() will have minimal work to do.
uvm_va_space_mm_unregister(va_space);
// MM FD teardown should already have destroyed va_space_mm
UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));
uvm_mutex_lock(&g_uvm_global.global_lock);
@@ -684,12 +690,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
goto done;
}
// Mixing Volta and Pascal GPUs is not supported on P9 systems.
// Mixing coherent and non-coherent GPUs is not supported
for_each_va_space_gpu(other_gpu, va_space) {
if ((gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2 &&
other_gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2) ||
(gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2 &&
other_gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2)) {
if (uvm_gpu_is_coherent(gpu->parent) != uvm_gpu_is_coherent(other_gpu->parent)) {
status = NV_ERR_INVALID_DEVICE;
goto done;
}
@@ -701,6 +704,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
goto done;
}
if (uvm_conf_computing_mode_enabled(gpu)) {
NvU32 gpu_index = uvm_id_gpu_index(gpu->id);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
&va_space->gpu_unregister_dma_buffer[gpu_index],
NULL);
if (status != NV_OK)
goto done;
gpu_can_access_sysmem = false;
}
uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
@@ -713,15 +727,16 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
// All GPUs have native atomics on their own memory
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_1) {
// TODO: Bug 3252572: Support the new link type UVM_GPU_LINK_C2C
if (gpu->parent->system_bus.link >= UVM_GPU_LINK_NVLINK_1) {
processor_mask_array_set(va_space->has_nvlink, gpu->id, UVM_ID_CPU);
processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
}
if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2) {
if (uvm_gpu_is_coherent(gpu->parent)) {
processor_mask_array_set(va_space->has_native_atomics, gpu->id, UVM_ID_CPU);
if (gpu->parent->numa_info.enabled) {
if (gpu->mem_info.numa.enabled) {
processor_mask_array_set(va_space->can_access, UVM_ID_CPU, gpu->id);
processor_mask_array_set(va_space->accessible_from, gpu->id, UVM_ID_CPU);
processor_mask_array_set(va_space->has_native_atomics, UVM_ID_CPU, gpu->id);
@@ -777,9 +792,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
goto cleanup;
}
if (gpu->parent->numa_info.enabled) {
if (gpu->mem_info.numa.enabled) {
*numa_enabled = NV_TRUE;
*numa_node_id = (NvS32)uvm_gpu_numa_info(gpu)->node_id;
*numa_node_id = (NvS32)uvm_gpu_numa_node(gpu);
}
else {
*numa_enabled = NV_FALSE;
@@ -987,6 +1002,7 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
return NV_ERR_NOT_COMPATIBLE;
}
// TODO: Bug 3848497: Disable GPU Peer Mapping when HCC is enabled
processor_mask_array_set(va_space->can_access, gpu0->id, gpu1->id);
processor_mask_array_set(va_space->can_access, gpu1->id, gpu0->id);
processor_mask_array_set(va_space->accessible_from, gpu0->id, gpu1->id);
@@ -1012,8 +1028,8 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
if (peer_caps->is_indirect_peer) {
UVM_ASSERT(peer_caps->link_type >= UVM_GPU_LINK_NVLINK_2);
UVM_ASSERT(gpu0->parent->numa_info.enabled);
UVM_ASSERT(gpu1->parent->numa_info.enabled);
UVM_ASSERT(gpu0->mem_info.numa.enabled);
UVM_ASSERT(gpu1->mem_info.numa.enabled);
processor_mask_array_set(va_space->indirect_peers, gpu0->id, gpu1->id);
processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
@@ -1411,6 +1427,10 @@ NV_STATUS uvm_va_space_register_gpu_va_space(uvm_va_space_t *va_space,
return NV_ERR_INVALID_DEVICE;
mm = uvm_va_space_mm_or_current_retain(va_space);
if (!mm) {
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
goto error_gpu_release;
}
status = create_gpu_va_space(gpu, va_space, user_rm_va_space, &gpu_va_space);
if (status != NV_OK)
@@ -2200,9 +2220,24 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
if (is_hmm) {
status = uvm_hmm_va_block_cpu_find(va_space, service_context, vmf, &va_block);
// Note that normally we should find a va_block for the faulting
// address because the block had to be created when migrating a
// page to the GPU and a device private PTE inserted into the CPU
// page tables in order for migrate_to_ram() to be called. Not
// finding it means the PTE was remapped to a different virtual
// address with mremap() so create a new va_block if needed.
status = uvm_hmm_va_block_find_create(va_space,
fault_addr,
&service_context->block_context,
&va_block);
if (status != NV_OK)
break;
status = uvm_hmm_migrate_begin(va_block);
if (status != NV_OK)
break;
service_context->cpu_fault.vmf = vmf;
}
else {
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
@@ -2219,10 +2254,10 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
if (is_hmm)
uvm_hmm_cpu_fault_finish(service_context);
uvm_hmm_migrate_finish(va_block);
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
if (status != NV_OK) {
if (status != NV_OK && !(is_hmm && status == NV_ERR_BUSY_RETRY)) {
UvmEventFatalReason reason;
reason = uvm_tools_status_to_fatal_fault_reason(status);