535.43.02

2026-02-07 00:29:58 +00:00 · 2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions
--- a/kernel-open/nvidia-uvm/uvm_push_test.c
+++ b/kernel-open/nvidia-uvm/uvm_push_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,10 +38,78 @@

 #define TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES 2

-static NvU32 get_push_end_size(uvm_channel_t *channel)
+static NvU32 get_push_begin_size(uvm_channel_t *channel)
 {
-    if (uvm_channel_is_ce(channel))
-        return UVM_PUSH_CE_END_SIZE;
+    if (uvm_channel_is_sec2(channel)) {
+        // SEC2 channels allocate CSL signature buffer at the beginning.
+        return UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE + UVM_METHOD_SIZE;
+    }
+
+    return 0;
+}
+
+// This is the storage required by a semaphore release.
+static NvU32 get_push_end_min_size(uvm_channel_t *channel)
+{
+    if (uvm_channel_is_ce(channel)) {
+        if (uvm_channel_is_wlc(channel)) {
+            // Space (in bytes) used by uvm_push_end() on a Secure CE channel.
+            // Note that Secure CE semaphore release pushes two memset and one
+            // encryption method on top of the regular release.
+            // Memset size
+            // -------------
+            // PUSH_2U (SET_REMAP)              :   3 Words
+            // PUSH_2U (OFFSET_OUT)             :   3 Words
+            // PUSH_1U (LINE_LENGTH_IN)         :   2 Words
+            // PUSH_1U (LAUNCH_DMA)             :   2 Words
+            // Total 10 * UVM_METHOD_SIZE       :  40 Bytes
+            //
+            // Encrypt size
+            // -------------
+            // PUSH_1U (SET_SECURE_COPY_MODE)   :   2 Words
+            // PUSH_4U (ENCRYPT_AUTH_TAG + IV)  :   5 Words
+            // PUSH_4U (OFFSET_IN_OUT)          :   5 Words
+            // PUSH_2U (LINE_LENGTH_IN)         :   2 Words
+            // PUSH_2U (LAUNCH_DMA)             :   2 Words
+            // Total 16 * UVM_METHOD_SIZE       :  64 Bytes
+            //
+            // TOTAL                            : 144 Bytes
+
+            // Same as CE + LCIC GPPut update + LCIC doorbell
+            return 24 + 144 + 24 + 24;
+        }
+        else if (uvm_channel_is_secure_ce(channel)) {
+            return 24 + 144;
+        }
+        // Space (in bytes) used by uvm_push_end() on a CE channel.
+        return 24;
+    }
+    else if (uvm_channel_is_sec2(channel)) {
+        // A perfectly aligned inline buffer in SEC2 semaphore release.
+        // We add UVM_METHOD_SIZE because of the NOP method to reserve
+        // UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES (the inline buffer.)
+        return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_METHOD_SIZE;
+    }
+
+    return 0;
+}
+
+static NvU32 get_push_end_max_size(uvm_channel_t *channel)
+{
+    if (uvm_channel_is_ce(channel)) {
+        if (uvm_channel_is_wlc(channel)) {
+            // WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
+            return UVM_MAX_WLC_PUSH_SIZE;
+        }
+        // Space (in bytes) used by uvm_push_end() on a CE channel.
+        return get_push_end_min_size(channel);
+    }
+    else if (uvm_channel_is_sec2(channel)) {
+        // Space (in bytes) used by uvm_push_end() on a SEC2 channel.
+        // Note that SEC2 semaphore release uses an inline buffer with alignment
+        // requirements. This is the "worst" case semaphore_release storage.
+        return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
+    }

    return 0;
 }
@@ -56,27 +124,41 @@ static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
        for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; ++type) {
            uvm_push_t push;
            NvU32 push_size_before;
-            NvU32 push_end_size_observed, push_end_size_expected;
+            NvU32 push_end_size_observed;
+            NvU32 push_end_size_expected[2];

+            // SEC2 is only available when Confidential Computing is enabled
+            if ((type == UVM_CHANNEL_TYPE_SEC2) && !uvm_conf_computing_mode_enabled(gpu))
+                continue;
+
+            // WLC is only available when Confidential Computing is enabled
+            if ((type == UVM_CHANNEL_TYPE_WLC) && !uvm_conf_computing_mode_enabled(gpu))
+                continue;
+
+            // LCIC doesn't accept pushes
+            if (type == UVM_CHANNEL_TYPE_LCIC)
+                continue;
            TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager,
                                             type,
                                             &push,
-                                             "type %s\n",
+                                             "type %s",
                                             uvm_channel_type_to_string(type)));

            push_size_before = uvm_push_get_size(&push);
            uvm_push_end(&push);
-
-            push_end_size_expected = get_push_end_size(push.channel);
            push_end_size_observed = uvm_push_get_size(&push) - push_size_before;

-            if (push_end_size_observed != push_end_size_expected) {
-                UVM_TEST_PRINT("push_end_size incorrect, %u instead of %u on channel type %s for GPU %s\n",
+            push_end_size_expected[0] = get_push_end_min_size(push.channel);
+            push_end_size_expected[1] = get_push_end_max_size(push.channel);
+
+            if (push_end_size_observed < push_end_size_expected[0] ||
+                push_end_size_observed > push_end_size_expected[1]) {
+                UVM_TEST_PRINT("push_end_size incorrect, %u instead of [%u:%u] on channel type %s for GPU %s\n",
                               push_end_size_observed,
-                               push_end_size_expected,
+                               push_end_size_expected[0],
+                               push_end_size_expected[1],
                               uvm_channel_type_to_string(type),
                               uvm_gpu_name(gpu));
-
                // The size mismatch error gets precedence over a wait error
                (void) uvm_push_wait(&push);

@@ -107,6 +189,11 @@ static NV_STATUS test_push_inline_data_gpu(uvm_gpu_t *gpu)
    uvm_mem_t *mem = NULL;
    char *verif;

+    // TODO: Bug 3839176: test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_PUSH_INLINE_DATA_MAX_SIZE, current->mm, &mem);
    TEST_CHECK_GOTO(status == NV_OK, done);

@@ -152,7 +239,10 @@ static NV_STATUS test_push_inline_data_gpu(uvm_gpu_t *gpu)
                        inline_buf[j] = 1 + i + j;
                    break;
                case TEST_INLINE_SINGLE_BUFFER:
-                    inline_buf = (char*)uvm_push_get_single_inline_buffer(&push, test_size, &data_gpu_address);
+                    inline_buf = (char*)uvm_push_get_single_inline_buffer(&push,
+                                                                          test_size,
+                                                                          UVM_METHOD_SIZE,
+                                                                          &data_gpu_address);
                    inline_data_size = test_size;
                    for (j = 0; j < test_size; ++j)
                        inline_buf[j] = 1 + i + j;
@@ -221,6 +311,12 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)

    for_each_va_space_gpu(gpu, va_space) {

+        // A secure channels reserved at the start of a push cannot be reserved
+        // again until that push ends. The test would block indefinitely
+        // if secure pools are not skipped, because the number of pushes started
+        // per pool exceeds the number of channels in the pool.
+        if (uvm_channel_type_requires_secure_pool(gpu, channel_type))
+            goto done;
        for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
            uvm_push_t *push = &pushes[i];
            status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
@@ -278,6 +374,11 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
    uvm_rm_mem_t *mem = NULL;
    atomic_t on_complete_counter = ATOMIC_INIT(0);

+    // TODO: Bug 3839176: test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    // This test issues virtual memcopies/memsets, which in SR-IOV heavy cannot
    // be pushed to a proxy channel. Pushing to a UVM internal CE channel works
    // in all scenarios.
@@ -294,7 +395,7 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
    num_non_paused_pushes = channel->num_gpfifo_entries;

    // The UVM driver only allows push interleaving across separate threads, but
-    // it is hard to consistenly replicate the interleaving. Instead, we
+    // it is hard to consistently replicate the interleaving. Instead, we
    // temporarily disable lock tracking, so we can interleave pushes from a
    // single thread.
    uvm_thread_context_lock_disable_tracking();
@@ -302,7 +403,7 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem);
    TEST_CHECK_GOTO(status == NV_OK, done);
    host_va = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
-    gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel));
+    gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel)).address;
    memset(host_va, 0, size);

    // Begin a few pushes on the channel, but do not end them yet.
@@ -434,14 +535,14 @@ static NV_STATUS test_push_exactly_max_push(uvm_gpu_t *gpu,
    if (status != NV_OK)
        return status;

-    TEST_CHECK_RET(uvm_push_has_space(push, UVM_MAX_PUSH_SIZE));
-    TEST_CHECK_RET(!uvm_push_has_space(push, UVM_MAX_PUSH_SIZE + 1));
+    TEST_CHECK_RET(uvm_push_has_space(push, UVM_MAX_PUSH_SIZE - get_push_begin_size(push->channel)));
+    TEST_CHECK_RET(!uvm_push_has_space(push, UVM_MAX_PUSH_SIZE - get_push_begin_size(push->channel) + 1));

    semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(sema_to_acquire, gpu, uvm_channel_is_proxy(push->channel));
    gpu->parent->host_hal->semaphore_acquire(push, semaphore_gpu_va, value);

    // Push a noop leaving just push_end_size in the pushbuffer.
-    push_end_size = get_push_end_size(push->channel);
+    push_end_size = get_push_end_max_size(push->channel);
    gpu->parent->host_hal->noop(push, UVM_MAX_PUSH_SIZE - uvm_push_get_size(push) - push_end_size);

    TEST_CHECK_RET(uvm_push_has_space(push, push_end_size));
@@ -476,7 +577,7 @@ static NvU32 test_count_available_chunks(uvm_pushbuffer_t *pushbuffer)

 // Test doing pushes of exactly UVM_MAX_PUSH_SIZE size and only allowing them to
 // complete one by one.
-static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
+static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu)
 {
    NV_STATUS status;

@@ -485,6 +586,7 @@ static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_cha
    NvU32 total_push_size = 0;
    NvU32 push_count = 0;
    NvU32 i;
+    uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;

    uvm_tracker_init(&tracker);

@@ -492,6 +594,13 @@ static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_cha
    TEST_CHECK_GOTO(status == NV_OK, done);

    uvm_gpu_semaphore_set_payload(&sema, 0);
+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        // Use SEC2 channel when Confidential Compute is enabled
+        // since all other channel types need extra space for
+        // work launch, and the channel type really doesn't
+        // matter for this test.
+        channel_type = UVM_CHANNEL_TYPE_SEC2;
+    }

    // Need to wait for all channels to completely idle so that the pushbuffer
    // is in completely idle state when we begin.
@@ -553,14 +662,6 @@ done:
    return status;
 }

-static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu)
-{
-
-    TEST_NV_CHECK_RET(test_max_pushes_on_gpu_and_channel_type(gpu, UVM_CHANNEL_TYPE_GPU_INTERNAL));
-
-    return NV_OK;
-}
-
 // Test doing UVM_PUSHBUFFER_CHUNKS independent pushes expecting each one to use
 // a different chunk in the pushbuffer.
 static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
@@ -570,6 +671,15 @@ static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
    uvm_gpu_semaphore_t sema;
    uvm_tracker_t tracker = UVM_TRACKER_INIT();
    NvU32 i;
+    uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
+
+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        // Use SEC2 channel when Confidential Compute is enabled
+        // since all other channel types need extra space for
+        // work launch, and the channel type really doesn't
+        // matter for this test.
+        channel_type = UVM_CHANNEL_TYPE_SEC2;
+    }

    uvm_tracker_init(&tracker);

@@ -587,7 +697,7 @@ static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
        NvU64 semaphore_gpu_va;
        uvm_push_t push;

-        status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Push using chunk %u", i);
+        status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "Push using chunk %u", i);
        TEST_CHECK_GOTO(status == NV_OK, done);

        semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(&sema, gpu, uvm_channel_is_proxy(push.channel));
@@ -666,6 +776,15 @@ static NV_STATUS test_timestamp_on_gpu(uvm_gpu_t *gpu)
    NvU32 i;
    NvU64 last_stamp = 0;

+    // TODO: Bug 3988992: [UVM][HCC] RFE - Support encrypted semaphore for secure CE channels
+    // This test is waived when Confidential Computing is enabled because it
+    // assumes that CPU can directly read the result of a semaphore timestamp
+    // operation. Instead the operation needs to be follower up by an encrypt
+    // -decrypt trip to be accessible to CPU. This will be cleaner and simpler
+    // once encrypted semaphores are available.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    for (i = 0; i < 10; ++i) {
        status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Releasing a timestamp");
        if (status != NV_OK)
@@ -769,6 +888,10 @@ static NV_STATUS test_push_gpu_to_gpu(uvm_va_space_t *va_space)

    for_each_va_space_gpu(gpu_a, va_space) {

+        // TODO: Bug 3839176: the test is waived on Confidential Computing because
+        // it assumes that GPU can access system memory without using encryption.
+        if (uvm_conf_computing_mode_enabled(gpu_a))
+            return NV_OK;
        for_each_va_space_gpu(gpu_b, va_space) {
            if (can_do_peer_copies(va_space, gpu_a, gpu_b)) {
                waive = false;