570.86.15

This commit is contained in:
Bernhard Stoeckner
2025-01-27 19:36:56 +01:00
parent 9d0b0414a5
commit 54d69484da
1166 changed files with 318863 additions and 182687 deletions

View File

@@ -88,6 +88,15 @@ MODULE_PARM_DESC(uvm_exp_gpu_cache_sysmem,
"This is an experimental parameter that may cause correctness issues if used.");
static void block_add_eviction_mappings_entry(void *args);
static void block_unmap_cpu(uvm_va_block_t *block,
uvm_va_block_context_t *block_context,
uvm_va_block_region_t region,
const uvm_page_mask_t *unmap_pages);
static bool block_check_chunks(uvm_va_block_t *va_block);
static size_t block_gpu_chunk_index(uvm_va_block_t *block,
uvm_gpu_t *gpu,
uvm_page_index_t page_index,
uvm_chunk_size_t *out_chunk_size);
uvm_va_space_t *uvm_va_block_get_va_space_maybe_dead(uvm_va_block_t *va_block)
{
@@ -2817,6 +2826,32 @@ static bool block_check_processor_not_mapped(uvm_va_block_t *block,
return true;
}
// Check that the EGM peer GPU was EGM enabled and that the EGM address
// is within the peer NUMA node address range.
static bool block_check_egm_peer(uvm_va_space_t *va_space, uvm_gpu_t *gpu, int nid, uvm_gpu_phys_address_t phys_addr)
{
uvm_egm_numa_node_info_t *remote_node_info;
uvm_parent_gpu_t *parent_gpu;
if (!uvm_aperture_is_peer(phys_addr.aperture))
return true;
remote_node_info = uvm_va_space_get_egm_numa_node_info(va_space, nid);
UVM_ASSERT(!uvm_parent_processor_mask_empty(&remote_node_info->parent_gpus));
for_each_parent_gpu_in_mask(parent_gpu, &remote_node_info->parent_gpus) {
UVM_ASSERT(parent_gpu->egm.enabled);
if (phys_addr.address + parent_gpu->egm.base_address >= remote_node_info->node_start &&
phys_addr.address + parent_gpu->egm.base_address < remote_node_info->node_end &&
remote_node_info->routing_table[uvm_parent_id_gpu_index(gpu->parent->id)] == parent_gpu) {
return true;
}
}
return false;
}
// Zero all pages of the newly-populated chunk which are not resident anywhere
// else in the system, adding that work to the block's tracker. In all cases,
// this function adds a dependency on passed in tracker to the block's tracker.
@@ -3186,17 +3221,28 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
if (UVM_ID_IS_CPU(block_page.processor)) {
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, block_page.nid, block_page.page_index);
NvU64 dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block,
uvm_cpu_chunk_get_size(chunk),
block_page.page_index);
uvm_va_block_region_t chunk_region;
NvU64 phys_addr;
uvm_aperture_t aperture = UVM_APERTURE_SYS;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
uvm_parent_gpu_t *routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, block_page.nid);
if (routing_gpu) {
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
phys_addr = page_to_phys(page);
aperture = uvm_gpu_egm_peer_aperture(gpu->parent, routing_gpu);
uvm_page_mask_set(&accessing_gpu_state->egm_pages, block_page.page_index);
return uvm_gpu_phys_address(aperture, phys_addr - routing_gpu->egm.base_address);
}
// The page should be mapped for physical access already as we do that
// eagerly on CPU page population and GPU state alloc.
UVM_ASSERT(dma_addr != 0);
dma_addr += (block_page.page_index - chunk_region.first) * PAGE_SIZE;
phys_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
chunk_region = uvm_va_block_chunk_region(block, uvm_cpu_chunk_get_size(chunk), block_page.page_index);
UVM_ASSERT(phys_addr != 0);
phys_addr += (block_page.page_index - chunk_region.first) * PAGE_SIZE;
return uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr);
return uvm_gpu_phys_address(aperture, phys_addr);
}
chunk = block_phys_page_chunk(block, block_page, &chunk_offset);
@@ -3210,7 +3256,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
UVM_ASSERT(uvm_va_space_peer_enabled(va_space, gpu, owning_gpu));
phys_addr = uvm_pmm_gpu_peer_phys_address(&owning_gpu->pmm, chunk, gpu);
phys_addr = uvm_gpu_peer_phys_address(owning_gpu, chunk->address, gpu);
phys_addr.address += chunk_offset;
return phys_addr;
}
@@ -3238,8 +3284,17 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
// CPU and local GPU accesses can rely on block_phys_page_address, but the
// resulting physical address may need to be converted into virtual.
if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id))
return uvm_gpu_address_copy(gpu, block_phys_page_address(block, block_page, gpu));
if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id)) {
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu);
// EGM mappings use physical addresses with a PEER aperture.
if (uvm_aperture_is_peer(phys_addr.aperture)) {
UVM_ASSERT(block_check_egm_peer(uvm_va_block_get_va_space(block), gpu, block_page.nid, phys_addr));
return uvm_gpu_address_from_phys(phys_addr);
}
return uvm_gpu_address_copy(gpu, phys_addr);
}
va_space = uvm_va_block_get_va_space(block);
@@ -3250,7 +3305,7 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
UVM_ASSERT(uvm_va_space_peer_enabled(va_space, gpu, owning_gpu));
chunk = block_phys_page_chunk(block, block_page, &chunk_offset);
copy_addr = uvm_pmm_gpu_peer_copy_address(&owning_gpu->pmm, chunk, gpu);
copy_addr = uvm_gpu_peer_copy_address(owning_gpu, chunk->address, gpu);
copy_addr.address += chunk_offset;
return copy_addr;
}
@@ -3300,6 +3355,7 @@ typedef struct
block_copy_addr_t src;
block_copy_addr_t dst;
uvm_conf_computing_dma_buffer_t *dma_buffer;
// True if at least one CE transfer (such as a memcopy) has already been
// pushed to the GPU during the VA block copy thus far.
bool copy_pushed;
@@ -3910,6 +3966,75 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
return NV_OK;
}
static NV_STATUS zero_destination_mem_if_needed(uvm_va_block_t *block,
uvm_va_block_region_t region,
uvm_page_mask_t *copy_mask,
uvm_processor_id_t src_id,
uvm_processor_id_t dst_id)
{
uvm_push_t zero_push;
uvm_page_index_t page_index;
bool zero_push_started = false;
uvm_gpu_t *dst_gpu, *src_gpu;
NV_STATUS status = NV_OK;
if (UVM_ID_IS_CPU(src_id) || UVM_ID_IS_CPU(dst_id))
return NV_OK;
src_gpu = uvm_gpu_get(src_id);
dst_gpu = uvm_gpu_get(dst_id);
if ((!src_gpu->nvlink_status.enabled ||
(uvm_parent_gpu_peer_link_type(src_gpu->parent, dst_gpu->parent) < UVM_GPU_LINK_NVLINK_5)) &&
uvm_gpu_get_injected_nvlink_error(src_gpu) == NV_OK)
return NV_OK;
for_each_va_block_page_in_region_mask(page_index, copy_mask, region) {
block_phys_page_t dst_phys_page = block_phys_page(dst_id, NUMA_NO_NODE, page_index);
uvm_gpu_chunk_t *dst_chunk = block_phys_page_chunk(block, dst_phys_page, NULL);
uvm_gpu_address_t memset_addr;
if (dst_chunk->is_zero)
continue;
if (!zero_push_started) {
status = uvm_push_begin_acquire(dst_gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
&block->tracker,
&zero_push,
"Zero dest pages for copy from %s to %s",
uvm_processor_get_name(src_id),
uvm_processor_get_name(dst_id));
if (status != NV_OK)
return status;
zero_push_started = true;
}
// Address if destination buffer relative to destination GPU should
// be fast to retrieve and doesn't need to be cached.
memset_addr = block_phys_page_copy_address(block, dst_phys_page, dst_gpu);
// Pipeline the memsets since they never overlap with each other
uvm_push_set_flag(&zero_push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
// Ending the push will add membar for all operations
uvm_push_set_flag(&zero_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
dst_gpu->parent->ce_hal->memset_8(&zero_push, memset_addr, 0, PAGE_SIZE);
}
if (zero_push_started) {
uvm_push_end(&zero_push);
status = uvm_tracker_add_push(&block->tracker, &zero_push);
if (status != NV_OK)
return status;
}
return status;
}
// Copies pages resident on the src_id processor to the dst_id processor
//
// The function adds the pages that were successfully copied to the output
@@ -3974,6 +4099,12 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
copy_state.src.is_block_contig = is_block_phys_contig(block, src_id, copy_state.src.nid);
copy_state.dst.is_block_contig = is_block_phys_contig(block, dst_id, copy_state.dst.nid);
// Zero destination pages if copying over nvlink that can hit STO
status = zero_destination_mem_if_needed(block, region, copy_mask, src_id, dst_id);
if (status != NV_OK)
return status;
// uvm_range_group_range_iter_first should only be called when the va_space
// lock is held, which is always the case unless an eviction is taking
// place.
@@ -4043,11 +4174,11 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
if (block_copy_should_use_push(block, &copy_state)) {
if (!copying_gpu) {
status = block_copy_begin_push(block, &copy_state, &block->tracker, &push);
if (status != NV_OK)
break;
copying_gpu = uvm_push_get_gpu(&push);
UVM_ASSERT(UVM_ID_IS_CPU(src_id) || uvm_id_equal(copying_gpu->id, src_id));
// Ensure that there is GPU state that can be used for CPU-to-CPU copies
if (UVM_ID_IS_CPU(dst_id) && uvm_id_equal(src_id, dst_id)) {
@@ -4093,11 +4224,12 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
if (block_copy_should_use_push(block, &copy_state)) {
// When CC is enabled, transfers between GPU and CPU don't rely on
// any GPU mapping of CPU chunks, physical or virtual.
if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
can_cache_src_phys_addr = false;
if (UVM_ID_IS_CPU(dst_id) && g_uvm_global.conf_computing_enabled)
if (UVM_ID_IS_CPU(dst_id) && g_uvm_global.conf_computing_enabled)
can_cache_dst_phys_addr = false;
// Computing the physical address is a non-trivial operation and
// seems to be a performance limiter on systems with 2 or more
// NVLINK links. Therefore, for physically-contiguous block
@@ -4690,6 +4822,34 @@ out:
return status == NV_OK ? tracker_status : status;
}
// Cleanup chunks that were pinned (e.g. to move data residency) but are no
// longer needed (e.g. the copy to move data residency failed)
static void block_cleanup_temp_pinned_gpu_chunks(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id)
{
size_t i, num_chunks;
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
// The GPU state has to be present otherwise there wouldn't be TEMP_PINNED
// chunks.
UVM_ASSERT(gpu_state);
num_chunks = block_num_gpu_chunks(va_block, gpu);
for (i = 0; i < num_chunks; ++i) {
uvm_gpu_chunk_t *chunk = gpu_state->chunks[i];
// chunks that are TEMP_PINNED were newly populated in
// block_populate_pages above. Release them since the copy
// failed and they won't be mapped to userspace.
if (chunk && chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
uvm_mmu_chunk_unmap(chunk, &va_block->tracker);
uvm_pmm_gpu_free(&gpu->pmm, chunk, &va_block->tracker);
gpu_state->chunks[i] = NULL;
}
}
}
NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
@@ -4777,6 +4937,15 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
prefetch_page_mask,
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
if (status != NV_OK) {
if (UVM_ID_IS_GPU(dest_id))
block_cleanup_temp_pinned_gpu_chunks(va_block, dest_id);
// TODO: bug 1766110 [uvm8] Async free of CPU pages
// Free allocated CPU pages.
UVM_ASSERT(block_check_chunks(va_block));
}
out:
uvm_processor_mask_cache_free(unmap_processor_mask);
return status;
@@ -4983,6 +5152,8 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
uvm_page_mask_t *migrated_pages;
uvm_page_mask_t *staged_pages;
uvm_page_mask_t *scratch_residency_mask;
uvm_page_mask_t *resident_mask;
uvm_page_mask_t *preprocess_page_mask = &va_block_context->make_resident.page_mask;
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
@@ -5005,6 +5176,28 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
if (!scratch_residency_mask)
return NV_ERR_NO_MEMORY;
// We cannot read-duplicate on different CPU NUMA nodes since there is only one
// CPU page table. So, the page has to migrate from the source NUMA node to the
// destination one.
// In order to correctly map pages on the destination NUMA node, all pages
// resident on other NUMA nodes have to be unmapped. Otherwise, their WRITE
// permission will be revoked but they'll remain mapped on the source NUMA node.
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
UVM_ID_IS_CPU(va_block_context->make_resident.dest_id)) {
uvm_page_mask_t *dest_nid_resident = uvm_va_block_resident_mask_get(va_block,
UVM_ID_CPU,
va_block_context->make_resident.dest_nid);
resident_mask = uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE);
if (page_mask)
uvm_page_mask_and(preprocess_page_mask, page_mask, resident_mask);
else
uvm_page_mask_copy(preprocess_page_mask, resident_mask);
if (uvm_page_mask_andnot(preprocess_page_mask, preprocess_page_mask, dest_nid_resident))
block_unmap_cpu(va_block, va_block_context, region, preprocess_page_mask);
}
// For pages that are entering read-duplication we need to unmap remote
// mappings and revoke RW and higher access permissions.
//
@@ -5012,11 +5205,11 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
// - Unmaps pages from all processors but the one with the resident copy
// - Revokes write access from the processor with the resident copy
for_each_id_in_mask(src_id, &va_block->resident) {
resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);
// Note that the below calls to block_populate_pages and
// block_copy_resident_pages also use
// va_block_context->make_resident.page_mask.
uvm_page_mask_t *preprocess_page_mask = &va_block_context->make_resident.page_mask;
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);
UVM_ASSERT(!uvm_page_mask_empty(resident_mask));
if (page_mask)
@@ -5233,7 +5426,7 @@ static bool block_check_gpu_chunks(uvm_va_block_t *block, uvm_gpu_id_t id)
}
if (chunk->state != UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu chunk_size: llu\n",
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu chunk_size: %u\n",
uvm_pmm_gpu_chunk_state_string(chunk->state),
block->start,
block->end + 1,
@@ -8904,9 +9097,7 @@ static void block_destroy_gpu_state(uvm_va_block_t *block, uvm_va_block_context_
uvm_gpu_va_space_t *gpu_va_space;
uvm_gpu_t *gpu;
if (!gpu_state)
return;
UVM_ASSERT(gpu_state);
uvm_assert_mutex_locked(&block->lock);
// Unmap PTEs and free page tables
@@ -9065,6 +9256,62 @@ void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block,
UVM_ASSERT(block_check_mappings(va_block, block_context));
}
static void compute_egm_page_mask(uvm_va_block_t *block,
uvm_va_block_context_t *block_context,
uvm_gpu_t *local_gpu,
uvm_parent_gpu_t *routing_gpu,
uvm_page_mask_t *mask)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
uvm_egm_numa_node_info_t *node_info;
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, local_gpu->id);
int nid;
uvm_page_mask_zero(mask);
// The pages that are using EGM mappings are determined by looking at the
// pages resident on the CPU NUMA node closest to the peer GPU.
for_each_egm_numa_node_info_for_gpu(node_info, va_space, routing_gpu, nid) {
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(block, UVM_ID_CPU, nid);
uvm_page_mask_or(mask, mask, residency_mask);
}
uvm_page_mask_and(mask, mask, &gpu_state->egm_pages);
}
// Unmap any EGM mappings. EGM mappings are torn down only if there is only
// one GPU instance under the parent
static void block_unmap_gpu_egm_mappings(uvm_va_block_t *va_block,
uvm_gpu_t *gpu0,
uvm_gpu_t *gpu1,
uvm_tracker_t *tracker)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
uvm_va_block_gpu_state_t *gpu_state0 = uvm_va_block_gpu_state_get(va_block, gpu0->id);
if (gpu1->parent->egm.enabled && uvm_va_space_single_gpu_in_parent(va_space, gpu1->parent) && gpu_state0) {
uvm_page_mask_t *unmap_page_mask = &block_context->caller_page_mask;
const uvm_page_mask_t *resident0 = uvm_va_block_resident_mask_get(va_block, gpu0->id, NUMA_NO_NODE);
const uvm_page_mask_t *resident1 = uvm_va_block_resident_mask_get(va_block,
UVM_ID_CPU,
gpu1->parent->closest_cpu_numa_node);
if (uvm_page_mask_andnot(unmap_page_mask, resident1, resident0) &&
uvm_page_mask_and(unmap_page_mask, unmap_page_mask, &gpu_state0->egm_pages)) {
NV_STATUS status = block_unmap_gpu(va_block, block_context, gpu0, unmap_page_mask, tracker);
if (status != NV_OK) {
UVM_ASSERT_MSG(status == uvm_global_get_status(),
"Unmapping failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu0));
}
}
}
}
void uvm_va_block_disable_peer(uvm_va_block_t *va_block, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
@@ -9072,43 +9319,50 @@ void uvm_va_block_disable_peer(uvm_va_block_t *va_block, uvm_gpu_t *gpu0, uvm_gp
uvm_tracker_t tracker = UVM_TRACKER_INIT();
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
uvm_page_mask_t *unmap_page_mask = &block_context->caller_page_mask;
uvm_va_block_gpu_state_t *gpu_state0 = uvm_va_block_gpu_state_get(va_block, gpu0->id);
uvm_va_block_gpu_state_t *gpu_state1 = uvm_va_block_gpu_state_get(va_block, gpu1->id);
const uvm_page_mask_t *resident0;
const uvm_page_mask_t *resident1;
uvm_assert_mutex_locked(&va_block->lock);
// If either of the GPUs doesn't have GPU state then nothing could be mapped
// If neither of the GPUs has a GPU state then nothing could be mapped
// between them.
if (!uvm_va_block_gpu_state_get(va_block, gpu0->id) || !uvm_va_block_gpu_state_get(va_block, gpu1->id))
if (!gpu_state0 && !gpu_state1)
return;
resident0 = uvm_va_block_resident_mask_get(va_block, gpu0->id, NUMA_NO_NODE);
resident1 = uvm_va_block_resident_mask_get(va_block, gpu1->id, NUMA_NO_NODE);
if (gpu_state0 && gpu_state1) {
resident0 = uvm_va_block_resident_mask_get(va_block, gpu0->id, NUMA_NO_NODE);
resident1 = uvm_va_block_resident_mask_get(va_block, gpu1->id, NUMA_NO_NODE);
// Unmap all pages resident on gpu1, but not on gpu0, from gpu0
if (uvm_page_mask_andnot(unmap_page_mask, resident1, resident0)) {
status = block_unmap_gpu(va_block, block_context, gpu0, unmap_page_mask, &tracker);
if (status != NV_OK) {
// Since all PTEs unmapped by this call have the same aperture, page
// splits should never be required so any failure should be the
// result of a system-fatal error.
UVM_ASSERT_MSG(status == uvm_global_get_status(),
"Unmapping failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu0));
// Unmap all pages resident on gpu1, but not on gpu0, from gpu0
if (uvm_page_mask_andnot(unmap_page_mask, resident1, resident0)) {
status = block_unmap_gpu(va_block, block_context, gpu0, unmap_page_mask, &tracker);
if (status != NV_OK) {
// Since all PTEs unmapped by this call have the same aperture, page
// splits should never be required so any failure should be the
// result of a system-fatal error.
UVM_ASSERT_MSG(status == uvm_global_get_status(),
"Unmapping failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu0));
}
}
// Unmap all pages resident on gpu0, but not on gpu1, from gpu1
if (uvm_page_mask_andnot(unmap_page_mask, resident0, resident1)) {
status = block_unmap_gpu(va_block, block_context, gpu1, unmap_page_mask, &tracker);
if (status != NV_OK) {
UVM_ASSERT_MSG(status == uvm_global_get_status(),
"Unmapping failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu0));
}
}
}
// Unmap all pages resident on gpu0, but not on gpu1, from gpu1
if (uvm_page_mask_andnot(unmap_page_mask, resident0, resident1)) {
status = block_unmap_gpu(va_block, block_context, gpu1, unmap_page_mask, &tracker);
if (status != NV_OK) {
UVM_ASSERT_MSG(status == uvm_global_get_status(),
"Unmapping failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu0));
}
}
block_unmap_gpu_egm_mappings(va_block, gpu0, gpu1, &tracker);
block_unmap_gpu_egm_mappings(va_block, gpu1, gpu0, &tracker);
status = uvm_tracker_add_tracker_safe(&va_block->tracker, &tracker);
if (status != NV_OK)
@@ -9225,11 +9479,16 @@ void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu
uvm_global_set_fatal_error(status);
}
// This function will copy the block's tracker into each chunk then free the
// chunk to PMM. If we do this before waiting for the block tracker below
// we'll populate PMM's free chunks with tracker entries, which gives us
// better testing coverage of chunk synchronization on GPU unregister.
block_destroy_gpu_state(va_block, va_block_context, gpu->id);
// The block lock might've been dropped and re-taken, so we have to re-check
// that gpu_state still exists.
if (uvm_va_block_gpu_state_get(va_block, gpu->id)) {
// This function will copy the block's tracker into each chunk then free
// the chunk to PMM. If we do this before waiting for the block tracker
// below we'll populate PMM's free chunks with tracker entries, which
// gives us better testing coverage of chunk synchronization on GPU
// unregister.
block_destroy_gpu_state(va_block, va_block_context, gpu->id);
}
// Any time a GPU is unregistered we need to make sure that there are no
// pending (direct or indirect) tracker entries for that GPU left in the
@@ -9280,7 +9539,7 @@ static void block_kill(uvm_va_block_t *block)
uvm_page_index_t page_index;
uvm_page_index_t next_page_index;
int nid;
uvm_va_block_context_t *block_context;
uvm_va_block_context_t *block_context = NULL;
if (uvm_va_block_is_dead(block))
return;
@@ -9289,8 +9548,6 @@ static void block_kill(uvm_va_block_t *block)
event_data.block_destroy.block = block;
uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_BLOCK_DESTROY, &event_data);
block_context = uvm_va_space_block_context(va_space, NULL);
// Unmap all processors in parallel first. Unmapping the whole block won't
// cause a page table split, so this should only fail if we have a system-
// fatal error.
@@ -9306,6 +9563,9 @@ static void block_kill(uvm_va_block_t *block)
// We could only be killed with mapped GPU state by VA range free or VA
// space teardown, so it's safe to use the va_space's block_context
// because both of those have the VA space lock held in write mode.
if (!block_context)
block_context = uvm_va_space_block_context(va_space, NULL);
status = uvm_va_block_unmap_mask(block, block_context, &block->mapped, region, NULL);
UVM_ASSERT(status == uvm_global_get_status());
}
@@ -9313,13 +9573,21 @@ static void block_kill(uvm_va_block_t *block)
UVM_ASSERT(uvm_processor_mask_empty(&block->mapped));
// Free the GPU page tables and chunks
for_each_gpu_id(id)
block_destroy_gpu_state(block, block_context, id);
for_each_gpu_id(id) {
if (uvm_va_block_gpu_state_get(block, id)) {
if (!block_context)
block_context = uvm_va_space_block_context(va_space, NULL);
block_destroy_gpu_state(block, block_context, id);
}
}
// Wait for the GPU PTE unmaps before freeing CPU memory
uvm_tracker_wait_deinit(&block->tracker);
// No processor should have the CPU mapped at this point
// No processor should have the CPU mapped at this point. block_context will
// be valid if any processor is mapped from the check above. Otherwise it
// won't be used by this helper.
UVM_ASSERT(block_check_processor_not_mapped(block, block_context, UVM_ID_CPU));
// Free CPU pages
@@ -9401,9 +9669,11 @@ static void block_gpu_release_region(uvm_va_block_t *va_block,
uvm_va_block_region_t region)
{
uvm_page_index_t page_index;
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[page_index];
size_t chunk_index = block_gpu_chunk_index(va_block, gpu, page_index, NULL);
uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[chunk_index];
if (!gpu_chunk)
continue;
@@ -11645,14 +11915,16 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
!uvm_processor_mask_empty(all_involved_processors)) {
uvm_gpu_t *gpu;
// Before checking for ECC errors, make sure all of the GPU work
// is finished. Creating mappings on the CPU would have to wait
// Before checking for ECC and NVLINK errors, make sure all of the GPU
// work is finished. Creating mappings on the CPU would have to wait
// for the tracker anyway so this shouldn't hurt performance.
status = uvm_tracker_wait(&va_block->tracker);
if (status != NV_OK)
return status;
for_each_va_space_gpu_in_mask(gpu, va_space, all_involved_processors) {
uvm_gpu_t *peer_gpu;
// We cannot call into RM here so use the no RM ECC check.
status = uvm_gpu_check_ecc_error_no_rm(gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
@@ -11671,6 +11943,24 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
}
if (status != NV_OK)
return status;
// Same as above for nvlink errors. Check the source GPU as well
// as all its peers.
for_each_gpu_in_mask(peer_gpu, &gpu->peer_info.peer_gpu_mask) {
status = uvm_gpu_check_nvlink_error_no_rm(peer_gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, peer_gpu->id);
if (status != NV_OK)
return status;
}
status = uvm_gpu_check_nvlink_error_no_rm(gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, gpu->id);
if (status != NV_OK)
return status;
}
}
@@ -12170,6 +12460,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
return status;
uvm_processor_mask_zero(&service_context->cpu_fault.gpus_to_check_for_ecc);
uvm_processor_mask_zero(&service_context->gpus_to_check_for_nvlink_errors);
if (skip_cpu_fault_with_valid_permissions(va_block, page_index, fault_access_type))
return NV_OK;
@@ -12178,7 +12469,8 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
// Throttling is implemented by sleeping in the fault handler on the CPU
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
service_context->cpu_fault.wakeup_time_stamp = thrashing_hint.throttle.end_time_stamp;
return NV_WARN_MORE_PROCESSING_REQUIRED;
status = NV_WARN_MORE_PROCESSING_REQUIRED;
goto out;
}
service_context->read_duplicate_count = 0;
@@ -12226,6 +12518,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
status = uvm_va_block_service_locked(UVM_ID_CPU, va_block, va_block_retry, service_context);
UVM_ASSERT(status != NV_WARN_MISMATCHED_TARGET);
out:
++service_context->num_retries;
return status;
@@ -12245,7 +12538,6 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
else
fault_access_type = UVM_FAULT_ACCESS_TYPE_READ;
service_context->num_retries = 0;
service_context->cpu_fault.did_migrate = false;
// We have to use vm_insert_page instead of handing the page to the kernel
@@ -13258,6 +13550,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
continue;
uvm_processor_get_uuid(id, &params->mapped_on[count]);
params->is_egm_mapping[count] = false;
params->mapping_type[count] = g_uvm_prot_to_test_pte_mapping[block_page_prot(block, id, page_index)];
UVM_ASSERT(params->mapping_type[count] != UVM_TEST_PTE_MAPPING_INVALID);
@@ -13266,11 +13559,28 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
nid = block_get_page_node_residency(block, page_index);
block_page = block_phys_page(processor_to_map, nid, page_index);
if (!UVM_ID_IS_CPU(id)) {
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, uvm_gpu_get(id));
uvm_gpu_t *gpu = uvm_gpu_get(id);
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu);
NvU64 phys_addr = gpu_phys_addr.address;
params->mapping_physical_address[count] = gpu_phys_addr.address;
if (UVM_ID_IS_CPU(block_page.processor)) {
uvm_parent_gpu_t *egm_routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, nid);
if (egm_routing_gpu) {
uvm_gpu_t *egm_gpu = uvm_parent_gpu_find_first_valid_gpu(egm_routing_gpu);
compute_egm_page_mask(block, block_context, gpu, egm_gpu->parent, &block_context->caller_page_mask);
if (uvm_page_mask_test(&block_context->caller_page_mask, block_page.page_index)) {
struct page *page = block_page_get(block, block_page);
phys_addr = page_to_phys(page) - egm_routing_gpu->egm.base_address;
params->is_egm_mapping[count] = true;
}
}
}
params->mapping_physical_address[count] = phys_addr;
}
else {
struct page *page = block_page_get(block, block_page);