This commit is contained in:
Bernhard Stoeckner
2023-08-08 18:26:28 +02:00
parent 29f830f1bb
commit 12c0739352
85 changed files with 33144 additions and 32566 deletions

View File

@@ -272,19 +272,26 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static void unlock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
NvU32 index = uvm_channel_index_in_pool(channel);
NvU32 index;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
__clear_bit(index, channel->pool->push_locks);
uvm_up_out_of_order(&channel->pool->push_sem);
}
if (!uvm_conf_computing_mode_enabled(gpu))
return;
index = uvm_channel_index_in_pool(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
__clear_bit(index, channel->pool->push_locks);
uvm_up_out_of_order(&channel->pool->push_sem);
}
static bool is_channel_locked_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel))
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_conf_computing_mode_enabled(gpu))
return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);
// For CE and proxy channels, we always return that the channel is locked,
@@ -295,25 +302,25 @@ static bool is_channel_locked_for_push(uvm_channel_t *channel)
static void lock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
NvU32 index = uvm_channel_index_in_pool(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
__set_bit(index, channel->pool->push_locks);
}
__set_bit(index, channel->pool->push_locks);
}
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
if (uvm_channel_is_secure(channel) &&
!test_bit(index, channel->pool->push_locks) &&
try_claim_channel_locked(channel, num_gpfifo_entries)) {
if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
lock_channel_for_push(channel);
return true;
}
@@ -321,57 +328,15 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
return false;
}
// Reserve a channel in the specified CE pool
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
// Reserve a channel in the specified secure pool
static NV_STATUS channel_reserve_in_secure_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
// Reserve a channel in the specified pool. The channel is locked until the push
// ends
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
NvU32 index;
UVM_ASSERT(pool);
UVM_ASSERT(pool->secure);
UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
@@ -426,6 +391,51 @@ done:
return NV_OK;
}
// Reserve a channel in the specified pool
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
return channel_reserve_and_lock_in_pool(pool, channel_out);
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
{
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
@@ -433,10 +443,7 @@ NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_t
UVM_ASSERT(pool != NULL);
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
@@ -452,10 +459,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
@@ -491,7 +495,7 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
return push_info - channel->push_infos;
}
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_t *channel, NvU64 semaphore_va)
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
{
NvU32 iv_index;
uvm_gpu_address_t notifier_gpu_va;
@@ -499,12 +503,14 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_
uvm_gpu_address_t semaphore_gpu_va;
uvm_gpu_address_t encrypted_payload_gpu_va;
uvm_gpu_t *gpu = push->gpu;
uvm_channel_t *channel = push->channel;
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
NvU32 payload_size = sizeof(*semaphore->payload);
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
@@ -538,19 +544,21 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
{
NV_STATUS status;
uvm_channel_manager_t *manager;
uvm_gpu_t *gpu;
UVM_ASSERT(channel);
UVM_ASSERT(push);
manager = channel->pool->manager;
gpu = uvm_channel_get_gpu(channel);
// Only SEC2 and WLC with set up fixed schedule can use direct push
// submission. All other cases (including WLC pre-schedule) need to
// reserve a launch channel that will be used to submit this push
// indirectly.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager)) &&
!uvm_channel_is_sec2(channel)) {
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
@@ -559,9 +567,9 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
return status;
}
// For secure channels, channel's lock should have been acquired in
// uvm_channel_reserve() or channel_reserve_in_secure_pool() before
// reaching here.
// When the Confidential Computing feature is enabled, the channel's lock
// should have already been acquired in uvm_channel_reserve() or
// channel_reserve_and_lock_in_pool().
UVM_ASSERT(is_channel_locked_for_push(channel));
push->channel = channel;
@@ -586,9 +594,8 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
NvU64 *gpfifo_entry;
NvU64 pushbuffer_va;
uvm_channel_t *channel = push->channel;
uvm_channel_manager_t *channel_manager = channel->pool->manager;
uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
BUILD_BUG_ON(sizeof(*gpfifo_entry) != NVB06F_GP_ENTRY__SIZE);
UVM_ASSERT(!uvm_channel_is_proxy(channel));
@@ -644,12 +651,11 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
static void do_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
else if (uvm_channel_is_sec2(push->channel))
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
else
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
}
static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
@@ -668,8 +674,8 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
// needs to be scheduled to get an encrypted shadow copy in unprotected
// sysmem. This allows UVM to later decrypt it and observe the new
// semaphore value.
if (uvm_channel_is_secure_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, push->channel, semaphore_va);
if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
}
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
@@ -746,15 +752,12 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
wmb();
// Ring the WLC doorbell to start processing the above push
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset,
wlc_channel->channel_info.workSubmissionToken);
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset, wlc_channel->channel_info.workSubmissionToken);
}
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@@ -767,7 +770,7 @@ static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
uvm_gpu_address_t push_enc_auth_tag_gpu;
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
// WLC submissions are done under channel lock, so there should be no
@@ -848,8 +851,6 @@ static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&gpput_auth_tag_gpu);
// Update GPPUT. The update needs 4B write to specific offset,
// however we can only do 16B aligned decrypt writes.
// A poison value is written to all other locations, this is ignored in
@@ -922,7 +923,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
}
else {
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
@@ -951,11 +952,9 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_auth_tag_gpu.address);
}
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@@ -968,7 +967,7 @@ static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
uvm_gpu_address_t push_auth_tag_gpu;
uvm_spin_loop_t spin;
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
// If the old_cpu_put is not equal to the last gpu put, other pushes are
@@ -1051,7 +1050,7 @@ static void encrypt_push(uvm_push_t *push)
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 push_size = uvm_push_get_size(push);
uvm_push_info_t *push_info = uvm_push_info_from_push(push);
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
if (!uvm_conf_computing_mode_enabled(gpu))
@@ -1098,6 +1097,7 @@ void uvm_channel_end_push(uvm_push_t *push)
NvU32 push_size;
NvU32 cpu_put;
NvU32 new_cpu_put;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
bool needs_sec2_work_submit = false;
channel_pool_lock(channel->pool);
@@ -1112,7 +1112,7 @@ void uvm_channel_end_push(uvm_push_t *push)
if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
gpu->parent->ce_hal->semaphore_reduction_inc(push,
paired_lcic->channel_info.gpPutGpuVa,
paired_lcic->num_gpfifo_entries - 1);
@@ -1126,7 +1126,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// pushes. However, direct pushes to WLC can be smaller than this
// size. This is used e.g. by indirect submission of control
// gpfifo entries.
channel_manager->gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
}
}
@@ -1144,7 +1144,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// Indirect submission via SEC2/WLC needs pushes to be aligned for
// encryption/decryption. The pushbuffer_size of this push
// influences starting address of the next push.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)))
if (uvm_conf_computing_mode_enabled(gpu))
entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
entry->push_info = &channel->push_infos[push->push_info_index];
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
@@ -1158,12 +1158,13 @@ void uvm_channel_end_push(uvm_push_t *push)
else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_wlc(push);
}
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
}
else {
// submitting via SEC2 starts a push, postpone until this push is ended
// submitting via SEC2 starts a push, postpone until this push is
// ended
needs_sec2_work_submit = true;
}
}
@@ -1202,12 +1203,13 @@ void uvm_channel_end_push(uvm_push_t *push)
static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry, NvU32 new_cpu_put)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 cpu_put = channel->cpu_put;
NvU64 *gpfifo_entry;
UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
return;
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
@@ -1234,7 +1236,7 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
UVM_ASSERT(!uvm_channel_is_sec2(channel));
UVM_ASSERT(uvm_channel_is_ce(channel));
// If the old_cpu_put is not equal to the last gpu put,
// Another push(es) is pending that needs to be submitted.
@@ -1290,6 +1292,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
NvU32 cpu_put;
NvU32 new_cpu_put;
bool needs_indirect_submit = false;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
channel_pool_lock(channel->pool);
@@ -1312,7 +1315,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
--channel->current_gpfifo_count;
submit_ctrl_gpfifo(channel, entry, new_cpu_put);
if (uvm_conf_computing_mode_enabled(channel->pool->manager->gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
needs_indirect_submit = true;
channel->cpu_put = new_cpu_put;
@@ -1385,16 +1388,15 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
return NV_OK;
}
static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool = channel->pool;
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
// uvm_channel_end_push() routine. Note that different than in
// channel_reserve_in_secure_pool, we cannot pick an unlocked channel from
// the secure pool, even when there is one available and *channel is locked.
// channel_reserve_and_lock_in_pool, we cannot pick an unlocked channel from
// the pool, even when there is one available and *channel is locked.
// Not a concern given that uvm_channel_reserve() is not the common-case for
// channel reservation, and only used for channel initialization, GPFIFO
// control work submission, and testing.
@@ -1409,6 +1411,8 @@ static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gp
uvm_spin_loop_init(&spin);
while (1) {
NV_STATUS status;
uvm_channel_update_progress(channel);
channel_pool_lock(pool);
@@ -1436,9 +1440,10 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
NV_STATUS status = NV_OK;
uvm_spin_loop_t spin;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_channel_is_secure(channel))
return uvm_channel_reserve_secure(channel, num_gpfifo_entries);
if (uvm_conf_computing_mode_enabled(gpu))
return channel_reserve_and_lock(channel, num_gpfifo_entries);
if (try_claim_channel(channel, num_gpfifo_entries))
return NV_OK;
@@ -1578,8 +1583,10 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
static NV_STATUS csl_init(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_secure(channel));
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
@@ -1589,7 +1596,7 @@ static NV_STATUS csl_init(uvm_channel_t *channel)
else {
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(channel->pool->manager->gpu));
uvm_gpu_name(gpu));
}
return status;
@@ -1609,7 +1616,10 @@ static void csl_destroy(uvm_channel_t *channel)
static void free_conf_computing_buffers(uvm_channel_t *channel)
{
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
uvm_rm_mem_free(channel->conf_computing.static_pb_unprotected_sysmem);
@@ -1637,10 +1647,12 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
{
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NV_STATUS status;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(semaphore->conf_computing.last_pushed_notifier),
@@ -1679,7 +1691,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@@ -1723,7 +1735,7 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
const size_t notifier_size = sizeof(*channel->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@@ -1758,8 +1770,10 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = alloc_conf_computing_buffers_semaphore(channel);
if (status != NV_OK)
@@ -1772,7 +1786,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
status = alloc_conf_computing_buffers_lcic(channel);
}
else {
uvm_gpu_t *gpu = channel->pool->manager->gpu;
void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
channel->num_gpfifo_entries);
@@ -1793,6 +1806,8 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(pool->num_channels > 0);
if (channel->tracking_sem.queued_value > 0) {
@@ -1816,9 +1831,10 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
uvm_kvfree(channel->gpfifo_entries);
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
csl_destroy(channel);
if (uvm_channel_is_secure_ce(channel))
if (uvm_channel_is_ce(channel))
free_conf_computing_buffers(channel);
}
@@ -1905,8 +1921,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_alloc_params.gpPutLoc = UVM_BUFFER_LOCATION_SYS;
}
channel_alloc_params.secure = channel->pool->secure;
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(channel_get_tsg(channel),
&channel_alloc_params,
&channel->handle,
@@ -1928,8 +1942,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_info->hwChannelId,
uvm_channel_is_sec2(channel) ? "SEC2" :
uvm_channel_is_wlc(channel) ? "WLC" :
uvm_channel_is_lcic(channel) ? "LCIC" :
uvm_channel_is_secure(channel) ? "CE (secure)" : "CE",
uvm_channel_is_lcic(channel) ? "LCIC" : "CE",
channel->pool->engine_index);
return NV_OK;
@@ -1981,7 +1994,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
channel->tools.pending_event_count = 0;
INIT_LIST_HEAD(&channel->tools.channel_list_node);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
semaphore_pool = gpu->secure_semaphore_pool;
status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
@@ -2007,7 +2020,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
goto error;
}
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
status = csl_init(channel);
if (status != NV_OK)
goto error;
@@ -2075,7 +2088,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
NvU64 gpfifo_entry;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
if (uvm_channel_is_sec2(channel))
@@ -2095,10 +2108,8 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_channel_is_ce(channel))
gpu->parent->ce_hal->init(&push);
else if (uvm_channel_is_sec2(channel))
gpu->parent->sec2_hal->init(&push);
else
UVM_ASSERT_MSG(0, "Unknown channel type!");
gpu->parent->sec2_hal->init(&push);
gpu->parent->host_hal->init(&push);
@@ -2153,11 +2164,6 @@ static unsigned channel_pool_type_num_tsgs(uvm_channel_pool_type_t pool_type)
return 1;
}
static bool pool_type_is_valid(uvm_channel_pool_type_t pool_type)
{
return(is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
}
static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_type_t pool_type)
{
if (pool_type == UVM_CHANNEL_POOL_TYPE_SEC2)
@@ -2229,7 +2235,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
unsigned num_tsgs;
uvm_channel_pool_t *pool;
UVM_ASSERT(pool_type_is_valid(pool_type));
UVM_ASSERT(uvm_pool_type_is_valid(pool_type));
pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
channel_manager->num_channel_pools++;
@@ -2260,10 +2266,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
num_channels = channel_pool_type_num_channels(pool_type);
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
if (pool->secure) {
if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
// Use different order lock for SEC2 and WLC channels.
// This allows reserving a SEC2 or WLC channel for indirect work
// submission while holding a reservation for a secure channel.
// submission while holding a reservation for a channel.
uvm_lock_order_t order = uvm_channel_pool_is_sec2(pool) ? UVM_LOCK_ORDER_CSL_SEC2_PUSH :
(uvm_channel_pool_is_wlc(pool) ? UVM_LOCK_ORDER_CSL_WLC_PUSH :
UVM_LOCK_ORDER_CSL_PUSH);
@@ -2297,23 +2303,6 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
return status;
}
static NV_STATUS channel_pool_add_secure(uvm_channel_manager_t *channel_manager,
uvm_channel_pool_type_t pool_type,
unsigned engine_index,
uvm_channel_pool_t **pool_out)
{
uvm_channel_pool_t *pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
pool->secure = true;
return channel_pool_add(channel_manager, pool_type, engine_index, pool_out);
}
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
{
// For now, all channels are secure channels
return true;
}
static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
{
if (!cap->supported || cap->grce)
@@ -2461,13 +2450,6 @@ static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
if (!ce_usable_for_channel_type(type, cap))
continue;
if (uvm_conf_computing_mode_is_hcc(manager->gpu)) {
// All usable CEs are secure
UVM_ASSERT(cap->secure);
// Multi-PCE LCEs are disallowed
UVM_ASSERT(hweight32(cap->cePceMask) == 1);
}
__set_bit(i, manager->ce_mask);
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
@@ -2523,7 +2505,7 @@ out:
return status;
}
// Return the non-secure pool corresponding to the given CE index
// Return the pool corresponding to the given CE index
//
// This function cannot be used to access the proxy pool in SR-IOV heavy.
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
@@ -2734,24 +2716,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
{
unsigned num_channel_pools;
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// Create one CE channel pool per usable CE
num_channel_pools = num_used_ce;
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// CE proxy channel pool.
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
num_channel_pools++;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
// Create one CE secure channel pool per usable CE
if (uvm_conf_computing_mode_is_hcc(manager->gpu))
num_channel_pools += num_used_ce;
// SEC2 pool, WLC pool, LCIC pool
// SEC2 pool, WLC pool, LCIC pool
if (uvm_conf_computing_mode_enabled(manager->gpu))
num_channel_pools += 3;
}
return num_channel_pools;
}
@@ -2783,38 +2758,6 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
return NV_OK;
}
static NV_STATUS channel_manager_create_ce_secure_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
unsigned ce;
if (!uvm_conf_computing_mode_is_hcc(manager->gpu))
return NV_OK;
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
NV_STATUS status;
unsigned type;
uvm_channel_pool_t *pool = NULL;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
if (status != NV_OK)
return status;
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
unsigned preferred = preferred_ce[type];
if (preferred != ce)
continue;
if (uvm_channel_type_requires_secure_pool(manager->gpu, type)) {
UVM_ASSERT(manager->pool_to_use.default_for_type[type] == NULL);
manager->pool_to_use.default_for_type[type] = pool;
}
}
}
return NV_OK;
}
static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(wlc);
@@ -3142,6 +3085,64 @@ static NV_STATUS channel_manager_setup_wlc_lcic(uvm_channel_pool_t *wlc_pool, uv
return NV_OK;
}
static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
NV_STATUS status;
unsigned wlc_lcic_ce_index;
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
if (!uvm_conf_computing_mode_enabled(manager->gpu))
return NV_OK;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
return NV_OK;
}
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
{
NV_STATUS status;
@@ -3162,62 +3163,11 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
if (!manager->channel_pools)
return NV_ERR_NO_MEMORY;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
unsigned wlc_lcic_ce_index;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
status = channel_manager_create_ce_secure_pools(manager, preferred_ce);
}
else {
status = channel_manager_create_ce_pools(manager, preferred_ce);
}
status = channel_manager_create_conf_computing_pools(manager, preferred_ce);
if (status != NV_OK)
return status;
status = channel_manager_create_ce_pools(manager, preferred_ce);
if (status != NV_OK)
return status;