mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 03:29:47 +00:00
580.65.06
This commit is contained in:
@@ -42,8 +42,8 @@
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_mmu.h"
|
||||
|
||||
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
|
||||
|
||||
@@ -93,6 +93,8 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
|
||||
parent_gpu->system_bus.link = get_gpu_link_type(gpu_info->sysmemLink);
|
||||
UVM_ASSERT(parent_gpu->system_bus.link != UVM_GPU_LINK_INVALID);
|
||||
|
||||
parent_gpu->ats_supported = gpu_info->atsSupport;
|
||||
|
||||
parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
|
||||
|
||||
if (gpu_info->systemMemoryWindowSize > 0) {
|
||||
@@ -114,6 +116,15 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
|
||||
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
|
||||
}
|
||||
|
||||
parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;
|
||||
|
||||
// Non-PASID ATS cannot be disabled if present, but it implies PASID ATS
|
||||
// support.
|
||||
// TODO: Bug 5161018: Include integrated GPUs in the assert once they
|
||||
// support PASID ATS.
|
||||
if (UVM_ATS_SUPPORTED() && parent_gpu->ats.non_pasid_ats_enabled && gpu_info->systemMemoryWindowSize)
|
||||
UVM_ASSERT(g_uvm_global.ats.supported);
|
||||
|
||||
uvm_uuid_string(uuid_buffer, &parent_gpu->uuid);
|
||||
snprintf(parent_gpu->name,
|
||||
sizeof(parent_gpu->name),
|
||||
@@ -139,9 +150,13 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
gpu->mem_info.cdmm_enabled = false;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
// TODO: Bug 5273146: Use RM control call to detect CDMM mode.
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent))
|
||||
gpu->mem_info.cdmm_enabled = true;
|
||||
|
||||
gpu->mem_info.numa.node_id = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
@@ -154,6 +169,10 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
// The buffer management thus remains the same: DMA mapped GPA addresses can
|
||||
// be accessed by the GPU, while unmapped addresses can not and any access is
|
||||
// blocked and potentially unrecoverable to the engine that made it.
|
||||
//
|
||||
// If non-PASID ATS is supported, then we can skip this because that will handle
|
||||
// things for us instead without us having to fake it with PASID ATS. This is
|
||||
// only to enable the feature on other ATS systems.
|
||||
static int gpu_get_internal_pasid(const uvm_gpu_t *gpu)
|
||||
{
|
||||
#if UVM_ATS_SVA_SUPPORTED() && defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT)
|
||||
@@ -175,13 +194,14 @@ static int gpu_get_internal_pasid(const uvm_gpu_t *gpu)
|
||||
#define UVM_INTERNAL_PASID 0
|
||||
#endif
|
||||
|
||||
// Enable internal ATS only if ATS is enabled in general and we are using
|
||||
// 64kB base page size. The base page size limitation is needed to avoid
|
||||
// GH180 MMU behaviour which does not refetch invalid 4K ATS translations
|
||||
// on access (see bug 3949400). This also works in virtualized environments
|
||||
// because the entire 64kB guest page has to be mapped and pinned by the
|
||||
// hypervisor for device access.
|
||||
if (g_uvm_global.ats.enabled && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
|
||||
// Enable internal ATS only if non-PASID ATS is disabled, but PASID ATS is
|
||||
// enabled, and we are using 64kB base page size. The base page size
|
||||
// limitation is needed to avoid GH180 MMU behaviour which does not refetch
|
||||
// invalid 4K ATS translations on access (see bug 3949400). This also works
|
||||
// in virtualized environments because the entire 64kB guest page has to be
|
||||
// mapped and pinned by the hypervisor for device access.
|
||||
if (!gpu->parent->ats.non_pasid_ats_enabled && g_uvm_global.ats.enabled &&
|
||||
uvm_parent_gpu_supports_ats(gpu->parent) && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
|
||||
struct device *dev = &gpu->parent->pci_dev->dev;
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
|
||||
@@ -252,6 +272,7 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
gpu->mem_info.static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
|
||||
gpu->mem_info.static_bar1_size = fb_info.staticBar1Size;
|
||||
gpu->mem_info.static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -1299,6 +1320,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
// The aperture here refers to sysmem, which only uses UVM_APERTURE_SYS
|
||||
if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
|
||||
gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
|
||||
|
||||
@@ -1385,6 +1407,7 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu)
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const UvmGpuInfo *gpu_info,
|
||||
@@ -1451,6 +1474,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
uvm_hal_init_properties(parent_gpu);
|
||||
|
||||
// On SoCs which are not NUMA aware use the linux kernel's fake NUMA node.
|
||||
if (parent_gpu->is_integrated_gpu && parent_gpu->closest_cpu_numa_node == -1) {
|
||||
UVM_ASSERT(num_possible_nodes() == 1);
|
||||
parent_gpu->closest_cpu_numa_node = numa_mem_id();
|
||||
}
|
||||
|
||||
UVM_ASSERT(!parent_gpu->rm_info.smcEnabled || parent_gpu->smc.supported);
|
||||
parent_gpu->smc.enabled = !!parent_gpu->rm_info.smcEnabled;
|
||||
|
||||
@@ -1568,12 +1597,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_pmm_gpu_device_p2p_init(gpu);
|
||||
|
||||
status = init_semaphore_pools(gpu);
|
||||
@@ -1765,8 +1788,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_pmm_gpu_device_p2p_deinit(gpu);
|
||||
|
||||
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
|
||||
|
||||
uvm_pmm_gpu_deinit(&gpu->pmm);
|
||||
|
||||
if (gpu->rm_address_space != 0)
|
||||
@@ -1863,7 +1884,9 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
++parent_gpu->stats.num_replayable_faults;
|
||||
}
|
||||
|
||||
static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
static void update_stats_fault_cb(uvm_va_space_t *va_space,
|
||||
uvm_perf_event_t event_id,
|
||||
uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
const uvm_fault_buffer_entry_t *fault_entry, *fault_instance;
|
||||
@@ -1888,7 +1911,9 @@ static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data
|
||||
update_stats_parent_gpu_fault_instance(parent_gpu, fault_instance, event_data->fault.gpu.is_duplicate);
|
||||
}
|
||||
|
||||
static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
static void update_stats_migration_cb(uvm_va_space_t *va_space,
|
||||
uvm_perf_event_t event_id,
|
||||
uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_gpu_t *gpu_dst = NULL;
|
||||
uvm_gpu_t *gpu_src = NULL;
|
||||
@@ -1910,11 +1935,11 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
|
||||
|
||||
// Page prefetching is also triggered by faults
|
||||
is_replayable_fault =
|
||||
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT;
|
||||
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT;
|
||||
is_non_replayable_fault =
|
||||
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT;
|
||||
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT;
|
||||
is_access_counter =
|
||||
event_data->migration.make_resident_context->cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||
event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||
|
||||
pages = event_data->migration.bytes / PAGE_SIZE;
|
||||
UVM_ASSERT(event_data->migration.bytes % PAGE_SIZE == 0);
|
||||
@@ -1929,7 +1954,7 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
NvU32 index = event_data->migration.access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
|
||||
}
|
||||
}
|
||||
@@ -1942,7 +1967,7 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
NvU32 index = event_data->migration.access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
|
||||
}
|
||||
}
|
||||
@@ -3169,6 +3194,7 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
// Sysmem is represented by UVM_APERTURE_SYS
|
||||
if (!gpu->mem_info.size)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
@@ -3637,41 +3663,50 @@ NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dm
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
static void *parent_gpu_dma_alloc(NvU64 size, uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
void *cpu_addr;
|
||||
|
||||
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
|
||||
cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, size, &dma_addr, gfp_flags);
|
||||
if (!cpu_addr)
|
||||
return cpu_addr;
|
||||
|
||||
*dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_dma_alloc(NvU64 size, uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
|
||||
{
|
||||
void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
|
||||
NV_STATUS status;
|
||||
void *cpu_addr = parent_gpu_dma_alloc(size, gpu->parent, gfp_flags, dma_address_out);
|
||||
if (!cpu_addr)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
// Some GPUs require TLB invalidation on IOMMU invalid -> valid transitions
|
||||
status = uvm_mmu_tlb_invalidate_phys(gpu);
|
||||
if (status != NV_OK) {
|
||||
uvm_parent_gpu_dma_free(size, gpu->parent, cpu_addr, *dma_address_out);
|
||||
return status;
|
||||
}
|
||||
|
||||
*cpu_addr_out = cpu_addr;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
|
||||
void uvm_parent_gpu_dma_free(NvU64 size, uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
|
||||
{
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, size, cpu_addr, dma_address);
|
||||
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_map_cpu_pages_no_invalidate(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
// This function only actually needs the parent GPU, but it takes in the
|
||||
// sub GPU for API symmetry with uvm_gpu_map_cpu_pages().
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
NvU64 dma_addr;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
@@ -3700,9 +3735,15 @@ static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct p
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
|
||||
NV_STATUS status = uvm_gpu_map_cpu_pages_no_invalidate(gpu, page, size, dma_address_out);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4868590: Issue GPA invalidate here
|
||||
// Some GPUs require TLB invalidation on IOMMU invalid -> valid
|
||||
// transitions.
|
||||
status = uvm_mmu_tlb_invalidate_phys(gpu);
|
||||
if (status != NV_OK)
|
||||
uvm_parent_gpu_unmap_cpu_pages(gpu->parent, *dma_address_out, size);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -3800,7 +3841,7 @@ NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARA
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
}
|
||||
|
||||
if (uvm_va_space_pageable_mem_access_supported(va_space) && gpu->parent->replayable_faults_supported)
|
||||
if (uvm_va_space_pageable_mem_access_enabled(va_space) && gpu->parent->replayable_faults_supported)
|
||||
params->pageableMemAccess = NV_TRUE;
|
||||
else
|
||||
params->pageableMemAccess = NV_FALSE;
|
||||
|
||||
Reference in New Issue
Block a user