535.43.02

2026-02-07 08:39:58 +00:00 · 2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -26,6 +26,7 @@
 #include "uvm_global.h"
 #include "uvm_kvmalloc.h"
 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
+#include "uvm_conf_computing.h"

 #define UVM_SEMAPHORE_SIZE 4
 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
@@ -44,6 +45,9 @@ struct uvm_gpu_semaphore_pool_struct
    // List of all the semaphore pages belonging to the pool
    struct list_head pages;

+    // Pages aperture.
+    uvm_aperture_t aperture;
+
    // Count of free semaphores among all the pages
    NvU32 free_semaphores_count;

@@ -66,11 +70,24 @@ struct uvm_gpu_semaphore_pool_page_struct
    DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
 };

+static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
+{
+    return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
+}
+
+static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
+{
+    return gpu_semaphore_pool_is_secure(semaphore->page->pool);
+}
+
 static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
 {
    NvU32 offset;
    NvU32 index;

+    if (gpu_semaphore_is_secure(semaphore))
+        return semaphore->conf_computing.index;
+
    UVM_ASSERT(semaphore->payload != NULL);
    UVM_ASSERT(semaphore->page != NULL);

@@ -118,6 +135,14 @@ static bool is_canary(NvU32 val)
    return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
 }

+static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
+{
+    // A pool allocated in the CPR of vidmem cannot be read/written from the
+    // CPU.
+    return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
+    return UVM_IS_DEBUG();
+}
+
 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
 // pool?
 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
@@ -125,13 +150,34 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
    return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
 }

+// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
+// the owning GPU as no other processor have access to it.
+static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
+                                        uvm_gpu_semaphore_pool_page_t *pool_page,
+                                        uvm_rm_mem_type_t memory_type)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
+    status = uvm_rm_mem_alloc(pool->gpu,
+                              memory_type,
+                              UVM_SEMAPHORE_PAGE_SIZE,
+                              UVM_CONF_COMPUTING_BUF_ALIGNMENT,
+                              &pool_page->memory);
+
+    if (status != NV_OK)
+        return status;
+
+    return NV_OK;
+}
+
 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
 {
    NV_STATUS status;
    uvm_gpu_semaphore_pool_page_t *pool_page;
    NvU32 *payloads;
    size_t i;
-    uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
+    uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;

    uvm_assert_mutex_locked(&pool->mutex);

@@ -142,13 +188,24 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)

    pool_page->pool = pool;

+    // Whenever the Confidential Computing feature is enabled, engines can
+    // access semaphores only in the CPR of vidmem. Mapping to other GPUs is
+    // also disabled.
+    if (gpu_semaphore_pool_is_secure(pool)) {
+        status = pool_alloc_secure_page(pool, pool_page, memory_type);
+
+        if (status != NV_OK)
+            goto error;
+    }
+    else {
    status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
-                                          rm_mem_type,
+                                          memory_type,
                                          UVM_SEMAPHORE_PAGE_SIZE,
                                          0,
                                          &pool_page->memory);
    if (status != NV_OK)
        goto error;
+    }

    // Verify the GPU can access the semaphore pool.
    UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
@@ -159,8 +216,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
    list_add(&pool_page->all_pages_node, &pool->pages);
    pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;

-    // Initialize the semaphore payloads to known values
-    if (UVM_IS_DEBUG()) {
+    if (semaphore_uses_canary(pool)) {
        payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
            payloads[i] = make_canary(0);
@@ -176,8 +232,6 @@ error:
 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
 {
    uvm_gpu_semaphore_pool_t *pool;
-    NvU32 *payloads;
-    size_t i;

    UVM_ASSERT(page);
    pool = page->pool;
@@ -190,9 +244,9 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
                   "count: %u\n",
                   pool->free_semaphores_count);

-    // Check for semaphore release-after-free
-    if (UVM_IS_DEBUG()) {
-        payloads = uvm_rm_mem_get_cpu_va(page->memory);
+    if (semaphore_uses_canary(pool)) {
+        size_t i;
+        NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory);
        for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
            UVM_ASSERT(is_canary(payloads[i]));
    }
@@ -223,11 +277,18 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
        if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
            continue;

-        semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
+        if (gpu_semaphore_pool_is_secure(pool)) {
+            semaphore->conf_computing.index = semaphore_index;
+        }
+        else {
+            semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
+                                                 semaphore_index * UVM_SEMAPHORE_SIZE);
+        }
+
        semaphore->page = page;

-        // Check for semaphore release-after-free
-        UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
+        if (semaphore_uses_canary(pool))
+            UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));

        uvm_gpu_semaphore_set_payload(semaphore, 0);

@@ -266,7 +327,7 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)

    // Write a known value lower than the current payload in an attempt to catch
    // release-after-free and acquire-after-free.
-    if (UVM_IS_DEBUG())
+    if (semaphore_uses_canary(pool))
        uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));

    uvm_mutex_lock(&pool->mutex);
@@ -294,12 +355,26 @@ NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t

    pool->free_semaphores_count = 0;
    pool->gpu = gpu;
+    pool->aperture = UVM_APERTURE_SYS;

    *pool_out = pool;

    return NV_OK;
 }

+NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+
+    status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
+    if (status == NV_OK)
+        (*pool_out)->aperture = UVM_APERTURE_VID;
+
+    return status;
+}
+
 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
 {
    uvm_gpu_semaphore_pool_page_t *page;
@@ -375,13 +450,16 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
 {
    NvU32 index = get_index(semaphore);
-    NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
+    NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;

    return base_va + UVM_SEMAPHORE_SIZE * index;
 }

 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
 {
+    if (gpu_semaphore_is_secure(semaphore))
+        return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
+
    return UVM_GPU_READ_ONCE(*semaphore->payload);
 }

@@ -398,6 +476,10 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
    // being optimized out on non-SMP configs (we need them for interacting with
    // the GPU correctly even on non-SMP).
    mb();
+
+    if (gpu_semaphore_is_secure(semaphore))
+            UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
+    else
    UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
 }

@@ -425,9 +507,22 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
    return true;
 }

+bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
+{
+    uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
+
+    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return true;
+
+    return false;
+}
+
+
 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
 {
    NV_STATUS status;
+    uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;

    memset(tracking_sem, 0, sizeof(*tracking_sem));

@@ -437,7 +532,14 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g

    UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);

-    uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
+    if (uvm_conf_computing_mode_enabled(pool->gpu))
+        order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
+
+    if (tracking_semaphore_uses_mutex(tracking_sem))
+        uvm_mutex_init(&tracking_sem->m_lock, order);
+    else
+        uvm_spin_lock_init(&tracking_sem->s_lock, order);
+
    atomic64_set(&tracking_sem->completed_value, 0);
    tracking_sem->queued_value = 0;

@@ -449,15 +551,117 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
    uvm_gpu_semaphore_free(&tracking_sem->semaphore);
 }

+static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
+{
+    // No new value, or the GPU is currently writing the new encrypted material
+    // and no change in value would still result in corrupted data.
+    return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
+}
+
+static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
+{
+    UvmCslIv local_iv;
+    NvU32 local_payload;
+    NvU32 new_sem_value;
+    NvU32 gpu_notifier;
+    NvU32 last_observed_notifier;
+    NvU32 new_gpu_notifier = 0;
+    NvU32 iv_index = 0;
+
+    // A channel can have multiple entries pending and the tracking semaphore
+    // update of each entry can race with this function. Since the semaphore
+    // needs to be updated to release a used entry, we never need more
+    // than 'num_gpfifo_entries' re-tries.
+    unsigned tries_left = channel->num_gpfifo_entries;
+    NV_STATUS status = NV_OK;
+    NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
+    UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
+    void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
+    NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
+    NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
+
+    UVM_ASSERT(uvm_channel_is_secure_ce(channel));
+
+    last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
+    gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+    UVM_ASSERT(last_observed_notifier <= gpu_notifier);
+
+    if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
+        return;
+
+    do {
+        gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+
+        // Odd notifier value means there's an update in progress.
+        if (gpu_notifier % 2)
+            continue;
+
+        // Make sure no memory accesses happen before we read the notifier
+        smp_mb__after_atomic();
+
+        iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
+        memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
+        local_payload = UVM_READ_ONCE(*payload_cpu_addr);
+        memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
+
+        // Make sure the second read of notifier happens after
+        // all memory accesses.
+        smp_mb__before_atomic();
+        new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
+        tries_left--;
+    } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
+
+    if (!tries_left) {
+        status = NV_ERR_INVALID_STATE;
+        goto error;
+    }
+
+    if (gpu_notifier == new_gpu_notifier) {
+        status = uvm_conf_computing_cpu_decrypt(channel,
+                                                &new_sem_value,
+                                                &local_payload,
+                                                &local_iv,
+                                                sizeof(new_sem_value),
+                                                &local_auth_tag);
+
+        if (status != NV_OK)
+            goto error;
+
+        uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
+        UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
+    }
+
+    return;
+
+error:
+    // Decryption failure is a fatal error as well as running out of try left.
+    // Upon testing, all decryption happened within one try, anything that
+    // would require ten retry would be considered active tampering with the
+    // data structures.
+    uvm_global_set_fatal_error(status);
+}
+
 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
 {
    NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
    // The semaphore value is the bottom 32 bits of completed_value
    NvU32 old_sem_value = (NvU32)old_value;
-    NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
+    NvU32 new_sem_value;
    NvU64 new_value;

-    uvm_assert_spinlock_locked(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_assert_mutex_locked(&tracking_semaphore->m_lock);
+    else
+        uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
+
+    if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
+        // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
+        //                     mechanism to all semaphore
+        uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
+        uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
+    }
+
+    new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);

    // The following logic to update the completed value is very subtle, it
    // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
@@ -466,7 +670,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    if (old_sem_value == new_sem_value) {
        // No progress since the last update.
        // No additional memory barrier required in this case as completed_value
-        // is always updated under the spinlock that this thread just acquired.
+        // is always updated under the lock that this thread just acquired.
        // That guarantees full ordering with all the accesses the thread that
        // updated completed_value did under the lock including the GPU
        // semaphore read.
@@ -493,7 +697,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
                           (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
                           old_value, new_value);

-    // Use an atomic write even though the spinlock is held so that the value can
+    // Use an atomic write even though the lock is held so that the value can
    // be (carefully) read atomically outside of the lock.
    //
    // atomic64_set() on its own doesn't imply any memory barriers and we need
@@ -521,9 +725,9 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    // guarantees that no accesses will be ordered above the atomic (and hence
    // the GPU semaphore read).
    //
-    // Notably the soon following uvm_spin_unlock() is a release barrier that
-    // allows later memory accesses to be reordered above it and hence doesn't
-    // provide the necessary ordering with the GPU semaphore read.
+    // Notably the soon following unlock is a release barrier that allows later
+    // memory accesses to be reordered above it and hence doesn't provide the
+    // necessary ordering with the GPU semaphore read.
    //
    // Also notably this would still need to be handled if we ever switch to
    // atomic64_set_release() and atomic64_read_acquire() for accessing
@@ -540,11 +744,17 @@ NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semapho
    // Check that the GPU which owns the semaphore is still present
    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));

-    uvm_spin_lock(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_mutex_lock(&tracking_semaphore->m_lock);
+    else
+        uvm_spin_lock(&tracking_semaphore->s_lock);

    completed = update_completed_value_locked(tracking_semaphore);

-    uvm_spin_unlock(&tracking_semaphore->lock);
+    if (tracking_semaphore_uses_mutex(tracking_semaphore))
+        uvm_mutex_unlock(&tracking_semaphore->m_lock);
+    else
+        uvm_spin_unlock(&tracking_semaphore->s_lock);

    return completed;
 }