535.43.02

This commit is contained in:
Andy Ritger
2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions

View File

@@ -26,6 +26,7 @@
#include "uvm_global.h"
#include "uvm_kvmalloc.h"
#include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
#include "uvm_conf_computing.h"
#define UVM_SEMAPHORE_SIZE 4
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
@@ -44,6 +45,9 @@ struct uvm_gpu_semaphore_pool_struct
// List of all the semaphore pages belonging to the pool
struct list_head pages;
// Pages aperture.
uvm_aperture_t aperture;
// Count of free semaphores among all the pages
NvU32 free_semaphores_count;
@@ -66,11 +70,24 @@ struct uvm_gpu_semaphore_pool_page_struct
DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
};
static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
{
return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
}
static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
{
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
}
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
{
NvU32 offset;
NvU32 index;
if (gpu_semaphore_is_secure(semaphore))
return semaphore->conf_computing.index;
UVM_ASSERT(semaphore->payload != NULL);
UVM_ASSERT(semaphore->page != NULL);
@@ -118,6 +135,14 @@ static bool is_canary(NvU32 val)
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
}
static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
{
// A pool allocated in the CPR of vidmem cannot be read/written from the
// CPU.
return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
return UVM_IS_DEBUG();
}
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
// pool?
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
@@ -125,13 +150,34 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
}
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
// the owning GPU as no other processor have access to it.
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
uvm_gpu_semaphore_pool_page_t *pool_page,
uvm_rm_mem_type_t memory_type)
{
NV_STATUS status;
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
status = uvm_rm_mem_alloc(pool->gpu,
memory_type,
UVM_SEMAPHORE_PAGE_SIZE,
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&pool_page->memory);
if (status != NV_OK)
return status;
return NV_OK;
}
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
{
NV_STATUS status;
uvm_gpu_semaphore_pool_page_t *pool_page;
NvU32 *payloads;
size_t i;
uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
uvm_assert_mutex_locked(&pool->mutex);
@@ -142,13 +188,24 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
pool_page->pool = pool;
// Whenever the Confidential Computing feature is enabled, engines can
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
// also disabled.
if (gpu_semaphore_pool_is_secure(pool)) {
status = pool_alloc_secure_page(pool, pool_page, memory_type);
if (status != NV_OK)
goto error;
}
else {
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
rm_mem_type,
memory_type,
UVM_SEMAPHORE_PAGE_SIZE,
0,
&pool_page->memory);
if (status != NV_OK)
goto error;
}
// Verify the GPU can access the semaphore pool.
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
@@ -159,8 +216,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
list_add(&pool_page->all_pages_node, &pool->pages);
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
// Initialize the semaphore payloads to known values
if (UVM_IS_DEBUG()) {
if (semaphore_uses_canary(pool)) {
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
payloads[i] = make_canary(0);
@@ -176,8 +232,6 @@ error:
static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
{
uvm_gpu_semaphore_pool_t *pool;
NvU32 *payloads;
size_t i;
UVM_ASSERT(page);
pool = page->pool;
@@ -190,9 +244,9 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
"count: %u\n",
pool->free_semaphores_count);
// Check for semaphore release-after-free
if (UVM_IS_DEBUG()) {
payloads = uvm_rm_mem_get_cpu_va(page->memory);
if (semaphore_uses_canary(pool)) {
size_t i;
NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
UVM_ASSERT(is_canary(payloads[i]));
}
@@ -223,11 +277,18 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
continue;
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
if (gpu_semaphore_pool_is_secure(pool)) {
semaphore->conf_computing.index = semaphore_index;
}
else {
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
semaphore_index * UVM_SEMAPHORE_SIZE);
}
semaphore->page = page;
// Check for semaphore release-after-free
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
if (semaphore_uses_canary(pool))
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
uvm_gpu_semaphore_set_payload(semaphore, 0);
@@ -266,7 +327,7 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
// Write a known value lower than the current payload in an attempt to catch
// release-after-free and acquire-after-free.
if (UVM_IS_DEBUG())
if (semaphore_uses_canary(pool))
uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
uvm_mutex_lock(&pool->mutex);
@@ -294,12 +355,26 @@ NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t
pool->free_semaphores_count = 0;
pool->gpu = gpu;
pool->aperture = UVM_APERTURE_SYS;
*pool_out = pool;
return NV_OK;
}
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
{
NV_STATUS status;
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
if (status == NV_OK)
(*pool_out)->aperture = UVM_APERTURE_VID;
return status;
}
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
{
uvm_gpu_semaphore_pool_page_t *page;
@@ -375,13 +450,16 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
{
NvU32 index = get_index(semaphore);
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
return base_va + UVM_SEMAPHORE_SIZE * index;
}
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
{
if (gpu_semaphore_is_secure(semaphore))
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
return UVM_GPU_READ_ONCE(*semaphore->payload);
}
@@ -398,6 +476,10 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
// being optimized out on non-SMP configs (we need them for interacting with
// the GPU correctly even on non-SMP).
mb();
if (gpu_semaphore_is_secure(semaphore))
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
else
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
}
@@ -425,9 +507,22 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
return true;
}
bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
{
uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
if (uvm_conf_computing_mode_enabled(gpu))
return true;
return false;
}
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
{
NV_STATUS status;
uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
memset(tracking_sem, 0, sizeof(*tracking_sem));
@@ -437,7 +532,14 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g
UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
if (uvm_conf_computing_mode_enabled(pool->gpu))
order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
if (tracking_semaphore_uses_mutex(tracking_sem))
uvm_mutex_init(&tracking_sem->m_lock, order);
else
uvm_spin_lock_init(&tracking_sem->s_lock, order);
atomic64_set(&tracking_sem->completed_value, 0);
tracking_sem->queued_value = 0;
@@ -449,15 +551,117 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
}
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
{
// No new value, or the GPU is currently writing the new encrypted material
// and no change in value would still result in corrupted data.
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
}
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
{
UvmCslIv local_iv;
NvU32 local_payload;
NvU32 new_sem_value;
NvU32 gpu_notifier;
NvU32 last_observed_notifier;
NvU32 new_gpu_notifier = 0;
NvU32 iv_index = 0;
// A channel can have multiple entries pending and the tracking semaphore
// update of each entry can race with this function. Since the semaphore
// needs to be updated to release a used entry, we never need more
// than 'num_gpfifo_entries' re-tries.
unsigned tries_left = channel->num_gpfifo_entries;
NV_STATUS status = NV_OK;
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
return;
do {
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
// Odd notifier value means there's an update in progress.
if (gpu_notifier % 2)
continue;
// Make sure no memory accesses happen before we read the notifier
smp_mb__after_atomic();
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
// Make sure the second read of notifier happens after
// all memory accesses.
smp_mb__before_atomic();
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
tries_left--;
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
if (!tries_left) {
status = NV_ERR_INVALID_STATE;
goto error;
}
if (gpu_notifier == new_gpu_notifier) {
status = uvm_conf_computing_cpu_decrypt(channel,
&new_sem_value,
&local_payload,
&local_iv,
sizeof(new_sem_value),
&local_auth_tag);
if (status != NV_OK)
goto error;
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
}
return;
error:
// Decryption failure is a fatal error as well as running out of try left.
// Upon testing, all decryption happened within one try, anything that
// would require ten retry would be considered active tampering with the
// data structures.
uvm_global_set_fatal_error(status);
}
static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
{
NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
// The semaphore value is the bottom 32 bits of completed_value
NvU32 old_sem_value = (NvU32)old_value;
NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
NvU32 new_sem_value;
NvU64 new_value;
uvm_assert_spinlock_locked(&tracking_semaphore->lock);
if (tracking_semaphore_uses_mutex(tracking_semaphore))
uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
else
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
// mechanism to all semaphore
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
}
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
// The following logic to update the completed value is very subtle, it
// helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
@@ -466,7 +670,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
if (old_sem_value == new_sem_value) {
// No progress since the last update.
// No additional memory barrier required in this case as completed_value
// is always updated under the spinlock that this thread just acquired.
// is always updated under the lock that this thread just acquired.
// That guarantees full ordering with all the accesses the thread that
// updated completed_value did under the lock including the GPU
// semaphore read.
@@ -493,7 +697,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
old_value, new_value);
// Use an atomic write even though the spinlock is held so that the value can
// Use an atomic write even though the lock is held so that the value can
// be (carefully) read atomically outside of the lock.
//
// atomic64_set() on its own doesn't imply any memory barriers and we need
@@ -521,9 +725,9 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
// guarantees that no accesses will be ordered above the atomic (and hence
// the GPU semaphore read).
//
// Notably the soon following uvm_spin_unlock() is a release barrier that
// allows later memory accesses to be reordered above it and hence doesn't
// provide the necessary ordering with the GPU semaphore read.
// Notably the soon following unlock is a release barrier that allows later
// memory accesses to be reordered above it and hence doesn't provide the
// necessary ordering with the GPU semaphore read.
//
// Also notably this would still need to be handled if we ever switch to
// atomic64_set_release() and atomic64_read_acquire() for accessing
@@ -540,11 +744,17 @@ NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semapho
// Check that the GPU which owns the semaphore is still present
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
uvm_spin_lock(&tracking_semaphore->lock);
if (tracking_semaphore_uses_mutex(tracking_semaphore))
uvm_mutex_lock(&tracking_semaphore->m_lock);
else
uvm_spin_lock(&tracking_semaphore->s_lock);
completed = update_completed_value_locked(tracking_semaphore);
uvm_spin_unlock(&tracking_semaphore->lock);
if (tracking_semaphore_uses_mutex(tracking_semaphore))
uvm_mutex_unlock(&tracking_semaphore->m_lock);
else
uvm_spin_unlock(&tracking_semaphore->s_lock);
return completed;
}