mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-22 07:53:58 +00:00
535.43.02
This commit is contained in:
@@ -171,8 +171,8 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (flags & ~UVM_INIT_FLAGS_MASK) {
|
||||
uvm_kvfree(va_space);
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
uvm_kvfree(va_space);
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
uvm_init_rwsem(&va_space->lock, UVM_LOCK_ORDER_VA_SPACE);
|
||||
@@ -262,6 +262,7 @@ fail:
|
||||
uvm_perf_destroy_va_space_events(&va_space->perf_events);
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(current->mm);
|
||||
|
||||
// See the comment in uvm_va_space_mm_unregister() for why this has to be
|
||||
// called after releasing the locks.
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
@@ -366,6 +367,11 @@ static void unregister_gpu(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
if (va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]) {
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
|
||||
va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)],
|
||||
&va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]->tracker);
|
||||
}
|
||||
va_space_check_processors_masks(va_space);
|
||||
}
|
||||
|
||||
@@ -412,6 +418,15 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
uvm_global_processor_mask_t retained_gpus;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
|
||||
// Normally we'd expect this to happen as part of uvm_mm_release()
|
||||
// but if userspace never initialized uvm_mm_fd that won't happen.
|
||||
// We don't have to take the va_space_mm spinlock and update state
|
||||
// here because we know no other thread can be in or subsequently
|
||||
// call uvm_api_mm_initialize successfully because the UVM
|
||||
// file-descriptor has been released.
|
||||
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
// Remove the VA space from the global list before we start tearing things
|
||||
// down so other threads can't see the VA space in a partially-valid state.
|
||||
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
|
||||
@@ -517,17 +532,8 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
|
||||
// Remove the mm_struct association on this VA space, if any. This may
|
||||
// invoke uvm_va_space_mm_shutdown(), which in turn will disable all
|
||||
// channels and wait for any retainers to finish, so it has to be done
|
||||
// outside of the VA space lock.
|
||||
//
|
||||
// Since we must already handle mm shutdown being called at any point prior
|
||||
// to this call, this call can be made at any point in
|
||||
// uvm_va_space_destroy(). It's beneficial to do it late after doing all
|
||||
// deferred frees for GPU VA spaces and channels, because then
|
||||
// uvm_va_space_mm_shutdown() will have minimal work to do.
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
// MM FD teardown should already have destroyed va_space_mm
|
||||
UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
@@ -684,12 +690,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Mixing Volta and Pascal GPUs is not supported on P9 systems.
|
||||
// Mixing coherent and non-coherent GPUs is not supported
|
||||
for_each_va_space_gpu(other_gpu, va_space) {
|
||||
if ((gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2 &&
|
||||
other_gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2) ||
|
||||
(gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2 &&
|
||||
other_gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2)) {
|
||||
if (uvm_gpu_is_coherent(gpu->parent) != uvm_gpu_is_coherent(other_gpu->parent)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto done;
|
||||
}
|
||||
@@ -701,6 +704,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
NvU32 gpu_index = uvm_id_gpu_index(gpu->id);
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
|
||||
&va_space->gpu_unregister_dma_buffer[gpu_index],
|
||||
NULL);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
gpu_can_access_sysmem = false;
|
||||
}
|
||||
|
||||
uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
|
||||
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
|
||||
|
||||
@@ -713,15 +727,16 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
// All GPUs have native atomics on their own memory
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
|
||||
|
||||
if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_1) {
|
||||
// TODO: Bug 3252572: Support the new link type UVM_GPU_LINK_C2C
|
||||
if (gpu->parent->system_bus.link >= UVM_GPU_LINK_NVLINK_1) {
|
||||
processor_mask_array_set(va_space->has_nvlink, gpu->id, UVM_ID_CPU);
|
||||
processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
|
||||
}
|
||||
|
||||
if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2) {
|
||||
if (uvm_gpu_is_coherent(gpu->parent)) {
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu->id, UVM_ID_CPU);
|
||||
|
||||
if (gpu->parent->numa_info.enabled) {
|
||||
if (gpu->mem_info.numa.enabled) {
|
||||
processor_mask_array_set(va_space->can_access, UVM_ID_CPU, gpu->id);
|
||||
processor_mask_array_set(va_space->accessible_from, gpu->id, UVM_ID_CPU);
|
||||
processor_mask_array_set(va_space->has_native_atomics, UVM_ID_CPU, gpu->id);
|
||||
@@ -777,9 +792,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (gpu->parent->numa_info.enabled) {
|
||||
if (gpu->mem_info.numa.enabled) {
|
||||
*numa_enabled = NV_TRUE;
|
||||
*numa_node_id = (NvS32)uvm_gpu_numa_info(gpu)->node_id;
|
||||
*numa_node_id = (NvS32)uvm_gpu_numa_node(gpu);
|
||||
}
|
||||
else {
|
||||
*numa_enabled = NV_FALSE;
|
||||
@@ -987,6 +1002,7 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
|
||||
return NV_ERR_NOT_COMPATIBLE;
|
||||
}
|
||||
|
||||
// TODO: Bug 3848497: Disable GPU Peer Mapping when HCC is enabled
|
||||
processor_mask_array_set(va_space->can_access, gpu0->id, gpu1->id);
|
||||
processor_mask_array_set(va_space->can_access, gpu1->id, gpu0->id);
|
||||
processor_mask_array_set(va_space->accessible_from, gpu0->id, gpu1->id);
|
||||
@@ -1012,8 +1028,8 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
|
||||
|
||||
if (peer_caps->is_indirect_peer) {
|
||||
UVM_ASSERT(peer_caps->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ASSERT(gpu0->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu1->parent->numa_info.enabled);
|
||||
UVM_ASSERT(gpu0->mem_info.numa.enabled);
|
||||
UVM_ASSERT(gpu1->mem_info.numa.enabled);
|
||||
|
||||
processor_mask_array_set(va_space->indirect_peers, gpu0->id, gpu1->id);
|
||||
processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
|
||||
@@ -1411,6 +1427,10 @@ NV_STATUS uvm_va_space_register_gpu_va_space(uvm_va_space_t *va_space,
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm) {
|
||||
status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
|
||||
goto error_gpu_release;
|
||||
}
|
||||
|
||||
status = create_gpu_va_space(gpu, va_space, user_rm_va_space, &gpu_va_space);
|
||||
if (status != NV_OK)
|
||||
@@ -2200,9 +2220,24 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
if (is_hmm) {
|
||||
status = uvm_hmm_va_block_cpu_find(va_space, service_context, vmf, &va_block);
|
||||
// Note that normally we should find a va_block for the faulting
|
||||
// address because the block had to be created when migrating a
|
||||
// page to the GPU and a device private PTE inserted into the CPU
|
||||
// page tables in order for migrate_to_ram() to be called. Not
|
||||
// finding it means the PTE was remapped to a different virtual
|
||||
// address with mremap() so create a new va_block if needed.
|
||||
status = uvm_hmm_va_block_find_create(va_space,
|
||||
fault_addr,
|
||||
&service_context->block_context,
|
||||
&va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
status = uvm_hmm_migrate_begin(va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
service_context->cpu_fault.vmf = vmf;
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
@@ -2219,10 +2254,10 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
|
||||
if (is_hmm)
|
||||
uvm_hmm_cpu_fault_finish(service_context);
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
if (status != NV_OK && !(is_hmm && status == NV_ERR_BUSY_RETRY)) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
|
||||
Reference in New Issue
Block a user