550.40.07

This commit is contained in:
Bernhard Stoeckner
2024-01-24 17:51:53 +01:00
parent bb2dac1f20
commit 91676d6628
1411 changed files with 261367 additions and 145959 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -30,7 +30,7 @@
bool uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
{
return uvm_global_processor_mask_test(&rm_mem->mapped_on, gpu->global_id);
return uvm_processor_mask_test(&rm_mem->mapped_on, gpu->id);
}
bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
@@ -38,68 +38,68 @@ bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
if (rm_mem->proxy_vas == NULL)
return false;
if (rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] == 0)
if (rm_mem->proxy_vas[uvm_id_value(gpu->id)] == 0)
return false;
UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
UVM_ASSERT(uvm_gpu_uses_proxy_channel_pool(gpu));
UVM_ASSERT(uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent));
return true;
}
bool uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t *rm_mem)
{
return uvm_global_processor_mask_test(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
return uvm_processor_mask_test(&rm_mem->mapped_on, UVM_ID_CPU);
}
static void rm_mem_set_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
{
rm_mem->vas[uvm_global_id_value(gpu->global_id)] = va;
uvm_global_processor_mask_set(&rm_mem->mapped_on, gpu->global_id);
rm_mem->vas[uvm_id_value(gpu->id)] = va;
uvm_processor_mask_set(&rm_mem->mapped_on, gpu->id);
}
static void rm_mem_set_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
{
rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = va;
rm_mem->proxy_vas[uvm_id_value(gpu->id)] = va;
}
static void rm_mem_set_cpu_va(uvm_rm_mem_t *rm_mem, void *va)
{
rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = (uintptr_t) va;
uvm_global_processor_mask_set(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
rm_mem->vas[UVM_ID_CPU_VALUE] = (uintptr_t) va;
uvm_processor_mask_set(&rm_mem->mapped_on, UVM_ID_CPU);
}
static void rm_mem_clear_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
{
UVM_ASSERT(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
uvm_global_processor_mask_clear(&rm_mem->mapped_on, gpu->global_id);
rm_mem->vas[uvm_global_id_value(gpu->global_id)] = 0;
uvm_processor_mask_clear(&rm_mem->mapped_on, gpu->id);
rm_mem->vas[uvm_id_value(gpu->id)] = 0;
}
static void rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
{
rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = 0;
rm_mem->proxy_vas[uvm_id_value(gpu->id)] = 0;
}
static void rm_mem_clear_cpu_va(uvm_rm_mem_t *rm_mem)
{
uvm_global_processor_mask_clear(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = 0;
uvm_processor_mask_clear(&rm_mem->mapped_on, UVM_ID_CPU);
rm_mem->vas[UVM_ID_CPU_VALUE] = 0;
}
NvU64 uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
{
UVM_ASSERT_MSG(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu), "GPU %s\n", uvm_gpu_name(gpu));
return rm_mem->vas[uvm_global_id_value(gpu->global_id)];
return rm_mem->vas[uvm_id_value(gpu->id)];
}
NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
{
UVM_ASSERT(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
return rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)];
return rm_mem->proxy_vas[uvm_id_value(gpu->id)];
}
uvm_gpu_address_t uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space)
@@ -108,7 +108,8 @@ uvm_gpu_address_t uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bo
gpu_va.aperture = UVM_APERTURE_MAX;
gpu_va.is_virtual = true;
if (uvm_conf_computing_mode_enabled(gpu) && (rm_mem->type == UVM_RM_MEM_TYPE_SYS))
if (g_uvm_global.conf_computing_enabled && (rm_mem->type == UVM_RM_MEM_TYPE_SYS))
gpu_va.is_unprotected = true;
if (is_proxy_va_space)
@@ -123,7 +124,7 @@ void *uvm_rm_mem_get_cpu_va(uvm_rm_mem_t *rm_mem)
{
UVM_ASSERT(uvm_rm_mem_mapped_on_cpu(rm_mem));
return (void *)(uintptr_t)rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE];
return (void *)(uintptr_t)rm_mem->vas[UVM_ID_CPU_VALUE];
}
static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
@@ -135,7 +136,7 @@ static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
if (!uvm_gpu_uses_proxy_channel_pool(gpu))
if (!uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
return NV_OK;
if (uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
@@ -208,7 +209,7 @@ NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
if (rm_mem == NULL)
return NV_ERR_NO_MEMORY;
if (!uvm_conf_computing_mode_enabled(gpu) || type == UVM_RM_MEM_TYPE_SYS)
if (!g_uvm_global.conf_computing_enabled || type == UVM_RM_MEM_TYPE_SYS)
alloc_info.bUnprotected = NV_TRUE;
alloc_info.alignment = gpu_alignment;
@@ -255,10 +256,11 @@ NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem)
if (uvm_rm_mem_mapped_on_cpu(rm_mem))
return NV_OK;
if (g_uvm_global.conf_computing_enabled)
UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
gpu = rm_mem->gpu_owner;
gpu_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
if (uvm_conf_computing_mode_enabled(gpu))
UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
status = uvm_rm_locked_call(nvUvmInterfaceMemoryCpuMap(gpu->rm_address_space,
gpu_va,
@@ -357,7 +359,7 @@ void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
{
uvm_global_gpu_id_t gpu_id;
uvm_gpu_id_t gpu_id;
uvm_gpu_t *gpu_owner;
if (rm_mem == NULL)
@@ -372,19 +374,19 @@ void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
uvm_rm_mem_unmap_cpu(rm_mem);
// Don't use for_each_global_gpu_in_mask() as the owning GPU might be being
// destroyed and already removed from the global GPU array causing the iteration
// to stop prematurely.
for_each_global_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) {
if (!uvm_global_id_equal(gpu_id, gpu_owner->global_id))
// Don't use for_each_gpu_in_mask() as the owning GPU might be being
// destroyed and already removed from the global GPU array causing the
// iteration to stop prematurely.
for_each_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) {
if (!uvm_id_equal(gpu_id, gpu_owner->id))
uvm_rm_mem_unmap_gpu(rm_mem, uvm_gpu_get(gpu_id));
}
rm_mem_unmap_gpu(rm_mem, gpu_owner);
UVM_ASSERT_MSG(uvm_global_processor_mask_empty(&rm_mem->mapped_on),
UVM_ASSERT_MSG(uvm_processor_mask_empty(&rm_mem->mapped_on),
"Left-over %u mappings in rm_mem\n",
uvm_global_processor_mask_get_count(&rm_mem->mapped_on));
uvm_processor_mask_get_count(&rm_mem->mapped_on));
uvm_kvfree(rm_mem->proxy_vas);
uvm_kvfree(rm_mem);
@@ -422,7 +424,7 @@ NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment)
UVM_ASSERT(rm_mem);
for_each_global_gpu(gpu) {
for_each_gpu(gpu) {
NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment);
if (status != NV_OK)
return status;