535.43.02

This commit is contained in:
Andy Ritger
2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -38,10 +38,78 @@
#define TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES 2
static NvU32 get_push_end_size(uvm_channel_t *channel)
static NvU32 get_push_begin_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel))
return UVM_PUSH_CE_END_SIZE;
if (uvm_channel_is_sec2(channel)) {
// SEC2 channels allocate CSL signature buffer at the beginning.
return UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE + UVM_METHOD_SIZE;
}
return 0;
}
// This is the storage required by a semaphore release.
static NvU32 get_push_end_min_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// Space (in bytes) used by uvm_push_end() on a Secure CE channel.
// Note that Secure CE semaphore release pushes two memset and one
// encryption method on top of the regular release.
// Memset size
// -------------
// PUSH_2U (SET_REMAP) : 3 Words
// PUSH_2U (OFFSET_OUT) : 3 Words
// PUSH_1U (LINE_LENGTH_IN) : 2 Words
// PUSH_1U (LAUNCH_DMA) : 2 Words
// Total 10 * UVM_METHOD_SIZE : 40 Bytes
//
// Encrypt size
// -------------
// PUSH_1U (SET_SECURE_COPY_MODE) : 2 Words
// PUSH_4U (ENCRYPT_AUTH_TAG + IV) : 5 Words
// PUSH_4U (OFFSET_IN_OUT) : 5 Words
// PUSH_2U (LINE_LENGTH_IN) : 2 Words
// PUSH_2U (LAUNCH_DMA) : 2 Words
// Total 16 * UVM_METHOD_SIZE : 64 Bytes
//
// TOTAL : 144 Bytes
// Same as CE + LCIC GPPut update + LCIC doorbell
return 24 + 144 + 24 + 24;
}
else if (uvm_channel_is_secure_ce(channel)) {
return 24 + 144;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return 24;
}
else if (uvm_channel_is_sec2(channel)) {
// A perfectly aligned inline buffer in SEC2 semaphore release.
// We add UVM_METHOD_SIZE because of the NOP method to reserve
// UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES (the inline buffer.)
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_METHOD_SIZE;
}
return 0;
}
static NvU32 get_push_end_max_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
return UVM_MAX_WLC_PUSH_SIZE;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return get_push_end_min_size(channel);
}
else if (uvm_channel_is_sec2(channel)) {
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
// Note that SEC2 semaphore release uses an inline buffer with alignment
// requirements. This is the "worst" case semaphore_release storage.
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
}
return 0;
}
@@ -56,27 +124,41 @@ static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; ++type) {
uvm_push_t push;
NvU32 push_size_before;
NvU32 push_end_size_observed, push_end_size_expected;
NvU32 push_end_size_observed;
NvU32 push_end_size_expected[2];
// SEC2 is only available when Confidential Computing is enabled
if ((type == UVM_CHANNEL_TYPE_SEC2) && !uvm_conf_computing_mode_enabled(gpu))
continue;
// WLC is only available when Confidential Computing is enabled
if ((type == UVM_CHANNEL_TYPE_WLC) && !uvm_conf_computing_mode_enabled(gpu))
continue;
// LCIC doesn't accept pushes
if (type == UVM_CHANNEL_TYPE_LCIC)
continue;
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager,
type,
&push,
"type %s\n",
"type %s",
uvm_channel_type_to_string(type)));
push_size_before = uvm_push_get_size(&push);
uvm_push_end(&push);
push_end_size_expected = get_push_end_size(push.channel);
push_end_size_observed = uvm_push_get_size(&push) - push_size_before;
if (push_end_size_observed != push_end_size_expected) {
UVM_TEST_PRINT("push_end_size incorrect, %u instead of %u on channel type %s for GPU %s\n",
push_end_size_expected[0] = get_push_end_min_size(push.channel);
push_end_size_expected[1] = get_push_end_max_size(push.channel);
if (push_end_size_observed < push_end_size_expected[0] ||
push_end_size_observed > push_end_size_expected[1]) {
UVM_TEST_PRINT("push_end_size incorrect, %u instead of [%u:%u] on channel type %s for GPU %s\n",
push_end_size_observed,
push_end_size_expected,
push_end_size_expected[0],
push_end_size_expected[1],
uvm_channel_type_to_string(type),
uvm_gpu_name(gpu));
// The size mismatch error gets precedence over a wait error
(void) uvm_push_wait(&push);
@@ -107,6 +189,11 @@ static NV_STATUS test_push_inline_data_gpu(uvm_gpu_t *gpu)
uvm_mem_t *mem = NULL;
char *verif;
// TODO: Bug 3839176: test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_PUSH_INLINE_DATA_MAX_SIZE, current->mm, &mem);
TEST_CHECK_GOTO(status == NV_OK, done);
@@ -152,7 +239,10 @@ static NV_STATUS test_push_inline_data_gpu(uvm_gpu_t *gpu)
inline_buf[j] = 1 + i + j;
break;
case TEST_INLINE_SINGLE_BUFFER:
inline_buf = (char*)uvm_push_get_single_inline_buffer(&push, test_size, &data_gpu_address);
inline_buf = (char*)uvm_push_get_single_inline_buffer(&push,
test_size,
UVM_METHOD_SIZE,
&data_gpu_address);
inline_data_size = test_size;
for (j = 0; j < test_size; ++j)
inline_buf[j] = 1 + i + j;
@@ -221,6 +311,12 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu, va_space) {
// A secure channels reserved at the start of a push cannot be reserved
// again until that push ends. The test would block indefinitely
// if secure pools are not skipped, because the number of pushes started
// per pool exceeds the number of channels in the pool.
if (uvm_channel_type_requires_secure_pool(gpu, channel_type))
goto done;
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
uvm_push_t *push = &pushes[i];
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
@@ -278,6 +374,11 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
uvm_rm_mem_t *mem = NULL;
atomic_t on_complete_counter = ATOMIC_INIT(0);
// TODO: Bug 3839176: test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
// This test issues virtual memcopies/memsets, which in SR-IOV heavy cannot
// be pushed to a proxy channel. Pushing to a UVM internal CE channel works
// in all scenarios.
@@ -294,7 +395,7 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
num_non_paused_pushes = channel->num_gpfifo_entries;
// The UVM driver only allows push interleaving across separate threads, but
// it is hard to consistenly replicate the interleaving. Instead, we
// it is hard to consistently replicate the interleaving. Instead, we
// temporarily disable lock tracking, so we can interleave pushes from a
// single thread.
uvm_thread_context_lock_disable_tracking();
@@ -302,7 +403,7 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_va = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel));
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel)).address;
memset(host_va, 0, size);
// Begin a few pushes on the channel, but do not end them yet.
@@ -434,14 +535,14 @@ static NV_STATUS test_push_exactly_max_push(uvm_gpu_t *gpu,
if (status != NV_OK)
return status;
TEST_CHECK_RET(uvm_push_has_space(push, UVM_MAX_PUSH_SIZE));
TEST_CHECK_RET(!uvm_push_has_space(push, UVM_MAX_PUSH_SIZE + 1));
TEST_CHECK_RET(uvm_push_has_space(push, UVM_MAX_PUSH_SIZE - get_push_begin_size(push->channel)));
TEST_CHECK_RET(!uvm_push_has_space(push, UVM_MAX_PUSH_SIZE - get_push_begin_size(push->channel) + 1));
semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(sema_to_acquire, gpu, uvm_channel_is_proxy(push->channel));
gpu->parent->host_hal->semaphore_acquire(push, semaphore_gpu_va, value);
// Push a noop leaving just push_end_size in the pushbuffer.
push_end_size = get_push_end_size(push->channel);
push_end_size = get_push_end_max_size(push->channel);
gpu->parent->host_hal->noop(push, UVM_MAX_PUSH_SIZE - uvm_push_get_size(push) - push_end_size);
TEST_CHECK_RET(uvm_push_has_space(push, push_end_size));
@@ -476,7 +577,7 @@ static NvU32 test_count_available_chunks(uvm_pushbuffer_t *pushbuffer)
// Test doing pushes of exactly UVM_MAX_PUSH_SIZE size and only allowing them to
// complete one by one.
static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu)
{
NV_STATUS status;
@@ -485,6 +586,7 @@ static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_cha
NvU32 total_push_size = 0;
NvU32 push_count = 0;
NvU32 i;
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
uvm_tracker_init(&tracker);
@@ -492,6 +594,13 @@ static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_cha
TEST_CHECK_GOTO(status == NV_OK, done);
uvm_gpu_semaphore_set_payload(&sema, 0);
if (uvm_conf_computing_mode_enabled(gpu)) {
// Use SEC2 channel when Confidential Compute is enabled
// since all other channel types need extra space for
// work launch, and the channel type really doesn't
// matter for this test.
channel_type = UVM_CHANNEL_TYPE_SEC2;
}
// Need to wait for all channels to completely idle so that the pushbuffer
// is in completely idle state when we begin.
@@ -553,14 +662,6 @@ done:
return status;
}
static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu)
{
TEST_NV_CHECK_RET(test_max_pushes_on_gpu_and_channel_type(gpu, UVM_CHANNEL_TYPE_GPU_INTERNAL));
return NV_OK;
}
// Test doing UVM_PUSHBUFFER_CHUNKS independent pushes expecting each one to use
// a different chunk in the pushbuffer.
static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
@@ -570,6 +671,15 @@ static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
uvm_gpu_semaphore_t sema;
uvm_tracker_t tracker = UVM_TRACKER_INIT();
NvU32 i;
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
if (uvm_conf_computing_mode_enabled(gpu)) {
// Use SEC2 channel when Confidential Compute is enabled
// since all other channel types need extra space for
// work launch, and the channel type really doesn't
// matter for this test.
channel_type = UVM_CHANNEL_TYPE_SEC2;
}
uvm_tracker_init(&tracker);
@@ -587,7 +697,7 @@ static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu)
NvU64 semaphore_gpu_va;
uvm_push_t push;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Push using chunk %u", i);
status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "Push using chunk %u", i);
TEST_CHECK_GOTO(status == NV_OK, done);
semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(&sema, gpu, uvm_channel_is_proxy(push.channel));
@@ -666,6 +776,15 @@ static NV_STATUS test_timestamp_on_gpu(uvm_gpu_t *gpu)
NvU32 i;
NvU64 last_stamp = 0;
// TODO: Bug 3988992: [UVM][HCC] RFE - Support encrypted semaphore for secure CE channels
// This test is waived when Confidential Computing is enabled because it
// assumes that CPU can directly read the result of a semaphore timestamp
// operation. Instead the operation needs to be follower up by an encrypt
// -decrypt trip to be accessible to CPU. This will be cleaner and simpler
// once encrypted semaphores are available.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
for (i = 0; i < 10; ++i) {
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Releasing a timestamp");
if (status != NV_OK)
@@ -769,6 +888,10 @@ static NV_STATUS test_push_gpu_to_gpu(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu_a, va_space) {
// TODO: Bug 3839176: the test is waived on Confidential Computing because
// it assumes that GPU can access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu_a))
return NV_OK;
for_each_va_space_gpu(gpu_b, va_space) {
if (can_do_peer_copies(va_space, gpu_a, gpu_b)) {
waive = false;