535.43.02

This commit is contained in:
Andy Ritger
2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions

View File

@@ -41,6 +41,7 @@
#include "uvm_gpu_access_counters.h"
#include "uvm_ats.h"
#include "uvm_test.h"
#include "uvm_conf_computing.h"
#include "uvm_linux.h"
@@ -66,21 +67,6 @@ static uvm_user_channel_t *get_user_channel(uvm_rb_tree_node_t *node)
return container_of(node, uvm_user_channel_t, instance_ptr.node);
}
static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
{
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
parent_gpu->rm_info = *gpu_info;
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
snprintf(parent_gpu->name,
sizeof(parent_gpu->name),
"ID %u: %s: %s",
uvm_id_value(parent_gpu->id),
parent_gpu->rm_info.name,
uuid_buffer);
}
static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
{
switch (link_type) {
@@ -94,44 +80,68 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
return UVM_GPU_LINK_NVLINK_3;
case UVM_LINK_TYPE_NVLINK_4:
return UVM_GPU_LINK_NVLINK_4;
case UVM_LINK_TYPE_C2C:
return UVM_GPU_LINK_C2C;
default:
return UVM_GPU_LINK_INVALID;
}
}
static NV_STATUS get_gpu_caps(uvm_parent_gpu_t *parent_gpu)
static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
{
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
parent_gpu->rm_info = *gpu_info;
parent_gpu->system_bus.link = get_gpu_link_type(gpu_info->sysmemLink);
UVM_ASSERT(parent_gpu->system_bus.link != UVM_GPU_LINK_INVALID);
parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
if (gpu_info->systemMemoryWindowSize > 0) {
// memory_window_end is inclusive but uvm_gpu_is_coherent() checks
// memory_window_end > memory_window_start as its condition.
UVM_ASSERT(gpu_info->systemMemoryWindowSize > 1);
parent_gpu->system_bus.memory_window_start = gpu_info->systemMemoryWindowStart;
parent_gpu->system_bus.memory_window_end = gpu_info->systemMemoryWindowStart +
gpu_info->systemMemoryWindowSize - 1;
}
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
// nvswitch is routed via physical pages, where the upper 13-bits of the
// 47-bit address space holds the routing information for each peer.
// Currently, this is limited to a 16GB framebuffer window size.
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
snprintf(parent_gpu->name,
sizeof(parent_gpu->name),
"ID %u: %s: %s",
uvm_id_value(parent_gpu->id),
parent_gpu->rm_info.name,
uuid_buffer);
}
static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
{
NV_STATUS status;
UvmGpuCaps gpu_caps;
memset(&gpu_caps, 0, sizeof(gpu_caps));
status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(parent_gpu->rm_device, &gpu_caps));
status = uvm_rm_locked_call(nvUvmInterfaceQueryCaps(uvm_gpu_device_handle(gpu), &gpu_caps));
if (status != NV_OK)
return status;
parent_gpu->sysmem_link = get_gpu_link_type(gpu_caps.sysmemLink);
UVM_ASSERT(parent_gpu->sysmem_link != UVM_GPU_LINK_INVALID);
parent_gpu->sysmem_link_rate_mbyte_per_s = gpu_caps.sysmemLinkRateMBps;
parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_caps.connectedToSwitch;
// nvswitch is routed via physical pages, where the upper 13-bits of the
// 47-bit address space holds the routing information for each peer.
// Currently, this is limited to a 16GB framebuffer window size.
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_caps.nvswitchMemoryWindowStart;
if (gpu_caps.numaEnabled) {
parent_gpu->numa_info.enabled = true;
parent_gpu->numa_info.node_id = gpu_caps.numaNodeId;
parent_gpu->numa_info.system_memory_window_start = gpu_caps.systemMemoryWindowStart;
parent_gpu->numa_info.system_memory_window_end = gpu_caps.systemMemoryWindowStart +
gpu_caps.systemMemoryWindowSize -
1;
UVM_ASSERT(uvm_gpu_is_coherent(gpu->parent));
gpu->mem_info.numa.enabled = true;
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
}
else {
UVM_ASSERT(!g_uvm_global.ats.enabled);
UVM_ASSERT(!uvm_gpu_is_coherent(gpu->parent));
}
return NV_OK;
@@ -347,26 +357,30 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
{
NvU32 i;
UvmGpuCopyEnginesCaps ces_caps;
UvmGpuCopyEnginesCaps *ces_caps;
NV_STATUS status;
memset(&ces_caps, 0, sizeof(ces_caps));
status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), &ces_caps));
ces_caps = uvm_kvmalloc_zero(sizeof(*ces_caps));
if (!ces_caps) {
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (no memory)\n");
return;
}
status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(gpu), ces_caps));
if (status != NV_OK) {
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces: unavailable (query failed)\n");
return;
goto out;
}
UVM_SEQ_OR_DBG_PRINT(s, "supported_ces:\n");
for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
UvmGpuCopyEngineCaps *ce_caps = ces_caps.copyEngineCaps + i;
UvmGpuCopyEngineCaps *ce_caps = ces_caps->copyEngineCaps + i;
if (!ce_caps->supported)
continue;
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u nvlink p2p %u "
"p2p %u\n",
UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
"nvlink p2p %u p2p %u\n",
i,
ce_caps->cePceMask,
ce_caps->grce,
@@ -377,6 +391,9 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
ce_caps->nvlinkP2p,
ce_caps->p2p);
}
out:
uvm_kvfree(ces_caps);
}
static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
@@ -394,7 +411,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
{
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 6);
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
switch (link_type) {
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
@@ -403,6 +420,7 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
UVM_ENUM_STRING_DEFAULT();
}
}
@@ -410,7 +428,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
{
const UvmGpuInfo *gpu_info = &gpu->parent->rm_info;
uvm_numa_info_t *numa_info = &gpu->parent->numa_info;
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU64 mapped_cpu_pages_size;
@@ -429,9 +446,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
return;
UVM_SEQ_OR_DBG_PRINT(s, "CPU link type %s\n",
uvm_gpu_link_type_string(gpu->parent->sysmem_link));
uvm_gpu_link_type_string(gpu->parent->system_bus.link));
UVM_SEQ_OR_DBG_PRINT(s, "CPU link bandwidth %uMBps\n",
gpu->parent->sysmem_link_rate_mbyte_per_s);
gpu->parent->system_bus.link_rate_mbyte_per_s);
UVM_SEQ_OR_DBG_PRINT(s, "architecture 0x%X\n", gpu_info->gpuArch);
UVM_SEQ_OR_DBG_PRINT(s, "implementation 0x%X\n", gpu_info->gpuImplementation);
@@ -453,13 +470,13 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->mem_info.max_allocatable_address,
gpu->mem_info.max_allocatable_address / (1024 * 1024));
if (numa_info->enabled) {
NvU64 window_size = numa_info->system_memory_window_end - numa_info->system_memory_window_start + 1;
UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id %u\n", numa_info->node_id);
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_start 0x%llx\n",
numa_info->system_memory_window_start);
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_end 0x%llx\n",
numa_info->system_memory_window_end);
if (gpu->mem_info.numa.enabled) {
NvU64 window_size = gpu->parent->system_bus.memory_window_end - gpu->parent->system_bus.memory_window_start + 1;
UVM_SEQ_OR_DBG_PRINT(s, "numa_node_id %u\n", uvm_gpu_numa_node(gpu));
UVM_SEQ_OR_DBG_PRINT(s, "memory_window_start 0x%llx\n",
gpu->parent->system_bus.memory_window_start);
UVM_SEQ_OR_DBG_PRINT(s, "memory_window_end 0x%llx\n",
gpu->parent->system_bus.memory_window_end);
UVM_SEQ_OR_DBG_PRINT(s, "system_memory_window_size 0x%llx (%llu MBs)\n",
window_size,
window_size / (1024 * 1024));
@@ -550,6 +567,10 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu_info_print_ce_caps(gpu, s);
if (uvm_conf_computing_mode_enabled(gpu)) {
UVM_SEQ_OR_DBG_PRINT(s, "dma_buffer_pool_num_buffers %lu\n",
gpu->conf_computing.dma_buffer_pool.num_dma_buffers);
}
}
static void
@@ -843,7 +864,7 @@ static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
proc_remove(peer_caps->procfs.peer_file[1]);
}
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
static NV_STATUS init_semaphore_pools(uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_gpu_t *other_gpu;
@@ -852,7 +873,17 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
if (status != NV_OK)
return status;
// When the Confidential Computing feature is enabled, a separate secure
// pool is created that holds page allocated in the CPR of vidmem.
if (uvm_conf_computing_mode_enabled(gpu)) {
status = uvm_gpu_semaphore_secure_pool_create(gpu, &gpu->secure_semaphore_pool);
if (status != NV_OK)
return status;
}
for_each_global_gpu(other_gpu) {
if (uvm_conf_computing_mode_enabled(gpu))
break;
if (other_gpu == gpu)
continue;
status = uvm_gpu_semaphore_pool_map_gpu(other_gpu->semaphore_pool, gpu);
@@ -863,7 +894,7 @@ static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
return NV_OK;
}
static void deinit_semaphore_pool(uvm_gpu_t *gpu)
static void deinit_semaphore_pools(uvm_gpu_t *gpu)
{
uvm_gpu_t *other_gpu;
@@ -874,6 +905,7 @@ static void deinit_semaphore_pool(uvm_gpu_t *gpu)
}
uvm_gpu_semaphore_pool_destroy(gpu->semaphore_pool);
uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
}
static NV_STATUS find_unused_global_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t *out_id)
@@ -1067,6 +1099,13 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
return status;
}
status = uvm_conf_computing_init_parent_gpu(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
nvstatusToString(status), parent_gpu->name);
return status;
}
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
@@ -1102,12 +1141,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
uvm_mmu_init_gpu_chunk_sizes(parent_gpu);
status = get_gpu_caps(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
return status;
}
status = uvm_ats_add_gpu(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
@@ -1166,6 +1199,12 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = get_gpu_caps(gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status;
}
uvm_mmu_init_gpu_peer_addresses(gpu);
status = alloc_and_init_address_space(gpu);
@@ -1198,7 +1237,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = init_semaphore_pool(gpu);
status = init_semaphore_pools(gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize the semaphore pool: %s, GPU %s\n",
nvstatusToString(status),
@@ -1228,6 +1267,14 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = uvm_conf_computing_gpu_init(gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize Confidential Compute: %s for GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
return status;
}
status = init_procfs_files(gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
@@ -1403,6 +1450,8 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
// Sync all trackers in PMM
uvm_pmm_gpu_sync(&gpu->pmm);
// Sync all trackers in the GPU's DMA allocation pool
uvm_conf_computing_dma_buffer_pool_sync(&gpu->conf_computing.dma_buffer_pool);
}
// Remove all references to the given GPU from its parent, since it is being
@@ -1485,7 +1534,7 @@ static void deinit_gpu(uvm_gpu_t *gpu)
// pain during development.
deconfigure_address_space(gpu);
deinit_semaphore_pool(gpu);
deinit_semaphore_pools(gpu);
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
@@ -1536,6 +1585,13 @@ static void remove_gpu(uvm_gpu_t *gpu)
if (free_parent)
destroy_nvlink_peers(gpu);
// uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
// deinitialization must be called before the GPU is removed from the global
// table.
//
// TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
uvm_conf_computing_gpu_deinit(gpu);
// TODO: Bug 2844714: If the parent is not being freed, the following
// gpu_table_lock is only needed to protect concurrent
// find_first_valid_gpu() in BH from the __clear_bit here. After
@@ -2213,9 +2269,12 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
{
NV_STATUS status;
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
// check for peer-to-peer compatibility (PCI-E or NvLink).
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|| peer_caps->link_type == UVM_GPU_LINK_C2C
)
return NV_ERR_NOT_SUPPORTED;
@@ -2225,8 +2284,8 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);
if (peer_caps->is_indirect_peer) {
UVM_ASSERT(gpu0->parent->numa_info.enabled);
UVM_ASSERT(gpu1->parent->numa_info.enabled);
UVM_ASSERT(gpu0->mem_info.numa.enabled);
UVM_ASSERT(gpu1->mem_info.numa.enabled);
status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
if (status != NV_OK)
@@ -2415,8 +2474,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
// Indirect peers are only supported when onlined as NUMA nodes, because
// we want to use vm_insert_page and dma_map_page.
if (p2p_caps_params.indirectAccess &&
(!gpu->parent->numa_info.enabled || !other_gpu->parent->numa_info.enabled))
if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
continue;
status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
@@ -2601,6 +2659,9 @@ uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
return UVM_APERTURE_VID;
if (uvm_conf_computing_mode_enabled(gpu))
return UVM_APERTURE_VID;
return UVM_APERTURE_DEFAULT;
}