mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-19 06:29:58 +00:00
550.40.65
This commit is contained in:
@@ -725,8 +725,9 @@ bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, u
|
||||
}
|
||||
|
||||
// Return the preferred NUMA node ID for the block's policy.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
|
||||
// is returned.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the nearest NUMA node ID
|
||||
// with memory is returned. In most cases, this should be the current
|
||||
// NUMA node.
|
||||
static int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
if (va_block_context->make_resident.dest_nid != NUMA_NO_NODE)
|
||||
@@ -2070,6 +2071,7 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
uvm_page_mask_t *allocated_mask;
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags = UVM_CPU_CHUNK_ALLOC_FLAGS_NONE;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(block, populate_region);
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_id_t id;
|
||||
int preferred_nid = block_context->make_resident.dest_nid;
|
||||
@@ -2077,6 +2079,10 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
if (block_test && block_test->cpu_chunk_allocation_target_id != NUMA_NO_NODE)
|
||||
preferred_nid = block_test->cpu_chunk_allocation_target_id;
|
||||
|
||||
// If the VA range has a preferred NUMA node, use it.
|
||||
if (preferred_nid == NUMA_NO_NODE)
|
||||
preferred_nid = policy->preferred_nid;
|
||||
|
||||
// TODO: Bug 4158598: Using NUMA_NO_NODE for staging allocations is sub-optimal.
|
||||
if (preferred_nid != NUMA_NO_NODE) {
|
||||
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(block, preferred_nid);
|
||||
@@ -2127,13 +2133,12 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
uvm_page_mask_t *node_pages_mask = &block_context->make_resident.node_pages_mask;
|
||||
uvm_chunk_sizes_mask_t allocation_sizes;
|
||||
|
||||
if (uvm_page_mask_test(allocated_mask, page_index)) {
|
||||
if (uvm_page_mask_test(allocated_mask, page_index) ||
|
||||
uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index)) {
|
||||
page_index = uvm_va_block_next_unset_page_in_mask(populate_region, allocated_mask, page_index) - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index));
|
||||
|
||||
allocation_sizes = block_calculate_largest_alloc_size(block,
|
||||
page_index,
|
||||
allocated_mask,
|
||||
@@ -3843,6 +3848,7 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
|
||||
uvm_gpu_address_t staging_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
uvm_gpu_address_t auth_tag_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
uvm_gpu_address_t src_address = block_copy_get_address(block, ©_state->src, page_index, gpu);
|
||||
NvU32 key_version = uvm_channel_pool_key_version(push->channel->pool);
|
||||
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(copy_state->src.id));
|
||||
UVM_ASSERT(UVM_ID_IS_CPU(copy_state->dst.id));
|
||||
@@ -3860,7 +3866,8 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
|
||||
// crypto-operations and it only guarantees PAGE_SIZE contiguity, all
|
||||
// encryptions and decryptions must happen on a PAGE_SIZE basis.
|
||||
for_each_va_block_page_in_region(page_index, region) {
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, &dma_buffer->decrypt_iv[page_index]);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, PAGE_SIZE, &dma_buffer->decrypt_iv[page_index]);
|
||||
dma_buffer->key_version[page_index] = key_version;
|
||||
|
||||
// All but the first encryption can be pipelined. The first encryption
|
||||
// uses the caller's pipelining settings.
|
||||
@@ -3919,7 +3926,8 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
|
||||
status = uvm_conf_computing_cpu_decrypt(push->channel,
|
||||
cpu_page_address,
|
||||
staging_buffer,
|
||||
&dma_buffer->decrypt_iv[page_index],
|
||||
dma_buffer->decrypt_iv + page_index,
|
||||
dma_buffer->key_version[page_index],
|
||||
PAGE_SIZE,
|
||||
auth_tag_buffer);
|
||||
kunmap(dst_page);
|
||||
@@ -4037,7 +4045,7 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
|
||||
|
||||
UVM_ASSERT(dst_chunk);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) <= uvm_cpu_chunk_get_size(dst_chunk));
|
||||
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
|
||||
|
||||
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
|
||||
// copies using GPU CEs. So, we have to make sure that all previously
|
||||
@@ -5132,7 +5140,7 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *dst_resident_mask;
|
||||
uvm_page_mask_t *migrated_pages;
|
||||
uvm_page_mask_t *staged_pages;
|
||||
uvm_page_mask_t *first_touch_mask;
|
||||
uvm_page_mask_t *scratch_residency_mask;
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
@@ -5151,6 +5159,10 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
|
||||
|
||||
scratch_residency_mask = kmem_cache_alloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS);
|
||||
if (!scratch_residency_mask)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// For pages that are entering read-duplication we need to unmap remote
|
||||
// mappings and revoke RW and higher access permissions.
|
||||
//
|
||||
@@ -5177,12 +5189,12 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
status = block_prep_read_duplicate_mapping(va_block, va_block_context, src_id, region, preprocess_page_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = block_populate_pages(va_block, va_block_retry, va_block_context, dest_id, region, page_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
|
||||
status = block_copy_resident_pages(va_block,
|
||||
va_block_context,
|
||||
@@ -5192,22 +5204,17 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
prefetch_page_mask,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
|
||||
// Pages that weren't resident anywhere else were populated at the
|
||||
// destination directly. Mark them as resident now, since there were no
|
||||
// errors from block_copy_resident_pages() above.
|
||||
// Note that va_block_context->scratch_page_mask is passed to
|
||||
// block_copy_set_first_touch_residency() which is generally unsafe but in
|
||||
// this case, block_copy_set_first_touch_residency() copies page_mask
|
||||
// before scratch_page_mask could be clobbered.
|
||||
migrated_pages = &va_block_context->make_resident.pages_migrated;
|
||||
first_touch_mask = &va_block_context->scratch_page_mask;
|
||||
uvm_page_mask_init_from_region(first_touch_mask, region, page_mask);
|
||||
uvm_page_mask_andnot(first_touch_mask, first_touch_mask, migrated_pages);
|
||||
uvm_page_mask_init_from_region(scratch_residency_mask, region, page_mask);
|
||||
uvm_page_mask_andnot(scratch_residency_mask, scratch_residency_mask, migrated_pages);
|
||||
|
||||
if (!uvm_page_mask_empty(first_touch_mask))
|
||||
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, first_touch_mask);
|
||||
if (!uvm_page_mask_empty(scratch_residency_mask))
|
||||
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, scratch_residency_mask);
|
||||
|
||||
staged_pages = &va_block_context->make_resident.pages_staged;
|
||||
if (!UVM_ID_IS_CPU(dest_id) && !uvm_page_mask_empty(staged_pages)) {
|
||||
@@ -5219,6 +5226,18 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
if (!uvm_page_mask_empty(migrated_pages)) {
|
||||
if (UVM_ID_IS_CPU(dest_id)) {
|
||||
// Check if the CPU is already in the resident set of processors.
|
||||
// We need to do this since we can't have multiple NUMA nodes with
|
||||
// resident pages.
|
||||
// If any of the migrate pages were already resident on the CPU, the
|
||||
// residency has to be switched to the destination NUMA node.
|
||||
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
|
||||
uvm_page_mask_and(scratch_residency_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE),
|
||||
migrated_pages)) {
|
||||
uvm_va_block_cpu_clear_resident_all_chunks(va_block, va_block_context, scratch_residency_mask);
|
||||
}
|
||||
|
||||
uvm_va_block_cpu_set_resident_all_chunks(va_block, va_block_context, migrated_pages);
|
||||
}
|
||||
else {
|
||||
@@ -5247,7 +5266,9 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
// Check state of all chunks after residency change.
|
||||
// TODO: Bug 4207783: Check both CPU and GPU chunks.
|
||||
UVM_ASSERT(block_check_cpu_chunks(va_block));
|
||||
return NV_OK;
|
||||
out:
|
||||
kmem_cache_free(g_uvm_page_mask_cache, scratch_residency_mask);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Looks up the current CPU mapping state of page from the
|
||||
@@ -5532,13 +5553,15 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
|
||||
*block->read_duplicated_pages.bitmap);
|
||||
|
||||
// Test read_duplicated_pages mask
|
||||
UVM_ASSERT_MSG((uvm_processor_mask_get_count(resident_processors) <= 1 &&
|
||||
!uvm_page_mask_test(&block->read_duplicated_pages, page_index)) ||
|
||||
(uvm_processor_mask_get_count(resident_processors) > 1 &&
|
||||
uvm_page_mask_test(&block->read_duplicated_pages, page_index)),
|
||||
UVM_ASSERT_MSG((!uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
|
||||
uvm_processor_mask_get_count(resident_processors) <= 1) ||
|
||||
(uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
|
||||
uvm_processor_mask_get_count(resident_processors) >= 1),
|
||||
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx - SWA: 0x%lx - RD: 0x%lx\n",
|
||||
*resident_processors->bitmap,
|
||||
*read_mappings->bitmap, *write_mappings->bitmap, *atomic_mappings->bitmap,
|
||||
*read_mappings->bitmap,
|
||||
*write_mappings->bitmap,
|
||||
*atomic_mappings->bitmap,
|
||||
*va_space->system_wide_atomics_enabled_processors.bitmap,
|
||||
*block->read_duplicated_pages.bitmap);
|
||||
|
||||
@@ -6022,7 +6045,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
|
||||
if (uvm_page_mask_empty(mapped_pages))
|
||||
return false;
|
||||
|
||||
return !uvm_id_equal(uvm_va_range_get_policy(block->va_range)->preferred_location, gpu_id);
|
||||
return !uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(block->va_range), gpu_id, NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
// Remote pages are pages which are mapped but not resident locally
|
||||
@@ -8365,6 +8388,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t resident_id,
|
||||
int resident_nid,
|
||||
uvm_page_mask_t *map_page_mask,
|
||||
uvm_prot_t new_prot,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@@ -8374,7 +8398,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
uvm_push_t push;
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *pages_to_map = &block_context->mapping.page_mask;
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, resident_nid);
|
||||
uvm_pte_bits_gpu_t pte_bit;
|
||||
uvm_pte_bits_gpu_t prot_pte_bit = get_gpu_pte_bit_index(new_prot);
|
||||
uvm_va_block_new_pte_state_t *new_pte_state = &block_context->mapping.new_pte_state;
|
||||
@@ -8383,8 +8407,10 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(map_page_mask);
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], gpu->id));
|
||||
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id))
|
||||
UVM_ASSERT(uvm_id_equal(resident_id, uvm_va_range_get_policy(va_block->va_range)->preferred_location));
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id)) {
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
UVM_ASSERT(uvm_va_policy_preferred_location_equal(policy, resident_id, policy->preferred_nid));
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_page_mask_and(&block_context->scratch_page_mask,
|
||||
map_page_mask,
|
||||
@@ -8486,18 +8512,27 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
return uvm_tracker_add_push_safe(out_tracker, &push);
|
||||
}
|
||||
|
||||
// allowed_nid_mask is only valid if the CPU is set in allowed_mask.
|
||||
static void map_get_allowed_destinations(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_processor_id_t id,
|
||||
uvm_processor_mask_t *allowed_mask)
|
||||
uvm_processor_mask_t *allowed_mask,
|
||||
nodemask_t *allowed_nid_mask)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
|
||||
*allowed_nid_mask = node_possible_map;
|
||||
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(block), id)) {
|
||||
// UVM-Lite can only map resident pages on the preferred location
|
||||
uvm_processor_mask_zero(allowed_mask);
|
||||
uvm_processor_mask_set(allowed_mask, policy->preferred_location);
|
||||
if (UVM_ID_IS_CPU(policy->preferred_location) &&
|
||||
!uvm_va_policy_preferred_location_equal(policy, UVM_ID_CPU, NUMA_NO_NODE)) {
|
||||
nodes_clear(*allowed_nid_mask);
|
||||
node_set(policy->preferred_nid, *allowed_nid_mask);
|
||||
}
|
||||
}
|
||||
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
|
||||
(uvm_id_equal(policy->preferred_location, id) &&
|
||||
@@ -8540,6 +8575,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
|
||||
NV_STATUS status = NV_OK;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
nodemask_t *allowed_nid_destinations;
|
||||
|
||||
va_block_context->mapping.cause = cause;
|
||||
|
||||
@@ -8589,10 +8625,20 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
if (!allowed_destinations)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
allowed_nid_destinations = uvm_kvmalloc(sizeof(*allowed_nid_destinations));
|
||||
if (!allowed_nid_destinations) {
|
||||
uvm_processor_mask_cache_free(allowed_destinations);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
// Map per resident location so we can more easily detect physically-
|
||||
// contiguous mappings.
|
||||
map_get_allowed_destinations(va_block, va_block_context, policy, id, allowed_destinations);
|
||||
|
||||
map_get_allowed_destinations(va_block,
|
||||
va_block_context,
|
||||
policy,
|
||||
id,
|
||||
allowed_destinations,
|
||||
allowed_nid_destinations);
|
||||
for_each_closest_id(resident_id, allowed_destinations, id, va_space) {
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
status = block_map_cpu_to(va_block,
|
||||
@@ -8603,11 +8649,30 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
}
|
||||
else if (UVM_ID_IS_CPU(resident_id)) {
|
||||
int nid;
|
||||
|
||||
// map_get_allowed_distinations() will set the mask of CPU NUMA
|
||||
// nodes that should be mapped.
|
||||
for_each_node_mask(nid, *allowed_nid_destinations) {
|
||||
status = block_map_gpu_to(va_block,
|
||||
va_block_context,
|
||||
gpu,
|
||||
resident_id,
|
||||
nid,
|
||||
running_page_mask,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = block_map_gpu_to(va_block,
|
||||
va_block_context,
|
||||
gpu,
|
||||
resident_id,
|
||||
NUMA_NO_NODE,
|
||||
running_page_mask,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
@@ -8622,6 +8687,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
uvm_processor_mask_cache_free(allowed_destinations);
|
||||
uvm_kvfree(allowed_nid_destinations);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -11175,8 +11241,8 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
// so uvm_va_block_map will be a no-op.
|
||||
uvm_processor_mask_and(map_uvm_lite_gpus, map_other_processors, block_get_uvm_lite_gpus(va_block));
|
||||
if (!uvm_processor_mask_empty(map_uvm_lite_gpus) &&
|
||||
uvm_id_equal(new_residency, preferred_location)) {
|
||||
for_each_id_in_mask(map_processor_id, map_uvm_lite_gpus) {
|
||||
uvm_va_policy_preferred_location_equal(policy, new_residency, va_block_context->make_resident.dest_nid)) {
|
||||
for_each_id_in_mask (map_processor_id, map_uvm_lite_gpus) {
|
||||
status = uvm_va_block_map(va_block,
|
||||
va_block_context,
|
||||
map_processor_id,
|
||||
@@ -11637,6 +11703,10 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
|
||||
// For GPU faults, the bottom half is pinned to CPUs closest to their GPU.
|
||||
// Therefore, in both cases, we can use numa_mem_id() to get the NUMA node
|
||||
// ID of the faulting processor.
|
||||
// Note that numa_mem_id() returns the nearest node with memory. In most
|
||||
// cases, this will be the current NUMA node. However, in the case that the
|
||||
// current node does not have any memory, we probably want the nearest node
|
||||
// with memory, anyway.
|
||||
int current_nid = numa_mem_id();
|
||||
bool may_read_duplicate = can_read_duplicate(va_block, page_index, policy, thrashing_hint);
|
||||
|
||||
@@ -11660,7 +11730,12 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
|
||||
// If read duplication is enabled and the page is also resident on the CPU,
|
||||
// keep its current NUMA node residency.
|
||||
if (may_read_duplicate && uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
|
||||
return block_get_page_node_residency(va_block, page_index);
|
||||
return NUMA_NO_NODE;
|
||||
|
||||
// The new_residency processor is the CPU and the preferred location is not
|
||||
// the CPU. If the page is resident on the CPU, keep its current residency.
|
||||
if (uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
|
||||
return NUMA_NO_NODE;
|
||||
|
||||
return current_nid;
|
||||
}
|
||||
@@ -12564,125 +12639,6 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
|
||||
}
|
||||
|
||||
// Launch a synchronous, encrypted copy between GPU and CPU.
|
||||
//
|
||||
// The copy entails a GPU-side encryption (relying on the Copy Engine), and a
|
||||
// CPU-side decryption step, such that the destination CPU buffer pointed by
|
||||
// dst_plain will contain the unencrypted (plain text) contents. The destination
|
||||
// buffer can be in protected or unprotected sysmem, while the source buffer
|
||||
// must be in protected vidmem.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmCslIv decrypt_iv;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, &decrypt_iv);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel, dst_plain, src_cipher, &decrypt_iv, size, auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
static NV_STATUS encrypted_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
@@ -12695,14 +12651,14 @@ static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_address_t src_gpu_address;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
return encrypted_memcopy_cpu_to_gpu(gpu,
|
||||
dst_gpu_address,
|
||||
uvm_mem_get_cpu_addr_kernel(src_mem),
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted write to [0x%llx, 0x%llx)",
|
||||
dst,
|
||||
dst + size);
|
||||
return uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
dst_gpu_address,
|
||||
uvm_mem_get_cpu_addr_kernel(src_mem),
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted write to [0x%llx, 0x%llx)",
|
||||
dst,
|
||||
dst + size);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
@@ -12799,14 +12755,14 @@ static NV_STATUS va_block_read_gpu_to_cpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_address_t dst_gpu_address;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
return encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
uvm_mem_get_cpu_addr_kernel(dst_mem),
|
||||
src_gpu_address,
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted read from [0x%llx, 0x%llx)",
|
||||
src,
|
||||
src + size);
|
||||
return uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
uvm_mem_get_cpu_addr_kernel(dst_mem),
|
||||
src_gpu_address,
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted read from [0x%llx, 0x%llx)",
|
||||
src,
|
||||
src + size);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
|
||||
Reference in New Issue
Block a user