525.53

2026-04-26 01:09:15 +00:00 · 2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
1323 changed files with 262135 additions and 60754 deletions
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -24,6 +24,7 @@
 #include "uvm_channel.h"

 #include "uvm_api.h"
+#include "uvm_common.h"
 #include "uvm_global.h"
 #include "uvm_hal.h"
 #include "uvm_procfs.h"
@@ -68,6 +69,38 @@ typedef enum
    UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
 } uvm_channel_update_mode_t;

+static void channel_pool_lock_init(uvm_channel_pool_t *pool)
+{
+    if (uvm_channel_pool_is_proxy(pool))
+        uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
+    else
+        uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
+}
+
+void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
+{
+    if (uvm_channel_pool_is_proxy(pool))
+        uvm_mutex_lock(&pool->mutex);
+    else
+        uvm_spin_lock(&pool->spinlock);
+}
+
+void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
+{
+    if (uvm_channel_pool_is_proxy(pool))
+        uvm_mutex_unlock(&pool->mutex);
+    else
+        uvm_spin_unlock(&pool->spinlock);
+}
+
+void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
+{
+    if (uvm_channel_pool_is_proxy(pool))
+        uvm_assert_mutex_locked(&pool->mutex);
+    else
+        uvm_assert_spinlock_locked(&pool->spinlock);
+}
+
 // Update channel progress, completing up to max_to_complete entries
 static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
                                                  NvU32 max_to_complete,
@@ -80,7 +113,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,

    NvU64 completed_value = uvm_channel_update_completed_value(channel);

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    // Completed value should never exceed the queued value
    UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
@@ -108,7 +141,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,

    channel->gpu_get = gpu_get;

-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);

    if (cpu_put >= gpu_get)
        pending_gpfifos = cpu_put - gpu_get;
@@ -157,7 +190,7 @@ static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entrie
 {
    NvU32 pending_entries;

-    uvm_assert_spinlock_locked(&channel->pool->lock);
+    uvm_channel_pool_assert_locked(channel->pool);

    if (channel->cpu_put >= channel->gpu_get)
        pending_entries = channel->cpu_put - channel->gpu_get;
@@ -174,14 +207,14 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
    UVM_ASSERT(num_gpfifo_entries > 0);
    UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    if (channel_is_available(channel, num_gpfifo_entries)) {
        channel->current_gpfifo_count += num_gpfifo_entries;
        claimed = true;
    }

-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);

    return claimed;
 }
@@ -248,7 +281,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t

 NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
 {
-	UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
+    UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
+
    return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
 }

@@ -289,14 +323,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
 {
    uvm_push_info_t *push_info;

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
    UVM_ASSERT(push_info != NULL);
    UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
    list_del(&push_info->available_list_node);

-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);

    return push_info - channel->push_infos;
 }
@@ -355,10 +389,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)

    UVM_ASSERT(uvm_channel_is_proxy(channel));

-    // nvUvmInterfacePagingChannelPushStream should not sleep, because a
-    // spinlock is currently held.
-    uvm_assert_spinlock_locked(&channel->pool->lock);
-
    status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);

    if (status != NV_OK) {
@@ -409,7 +439,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    NvU32 cpu_put;
    NvU32 new_cpu_put;

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    new_tracking_value = ++channel->tracking_sem.queued_value;
    new_payload = (NvU32)new_tracking_value;
@@ -446,7 +476,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    // may notice the GPU work to be completed and hence all state tracking the
    // push must be updated before that. Notably uvm_pushbuffer_end_push() has
    // to be called first.
-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);
    unlock_push(channel);

    // This memory barrier is borrowed from CUDA, as it supposedly fixes perf
@@ -470,7 +500,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
    NvU32 new_cpu_put;
    uvm_gpu_t *gpu = channel->pool->manager->gpu;

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    cpu_put = channel->cpu_put;
    new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
@@ -505,7 +535,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
    // The moment the channel is unlocked uvm_channel_update_progress_with_max()
    // may notice the GPU work to be completed and hence all state tracking the
    // push must be updated before that.
-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);
    unlock_push(channel);

    // This memory barrier is borrowed from CUDA, as it supposedly fixes perf
@@ -591,12 +621,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
    if (pending_count == 0)
        return NULL;

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    if (channel->gpu_get != channel->cpu_put)
        entry = &channel->gpfifo_entries[channel->gpu_get];

-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);

    return entry;
 }
@@ -720,9 +750,9 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
        channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
    }

-    uvm_procfs_destroy_entry(channel->procfs.pushes);
-    uvm_procfs_destroy_entry(channel->procfs.info);
-    uvm_procfs_destroy_entry(channel->procfs.dir);
+    proc_remove(channel->procfs.pushes);
+    proc_remove(channel->procfs.info);
+    proc_remove(channel->procfs.dir);

    uvm_kvfree(channel->push_acquire_infos);
    uvm_kvfree(channel->push_infos);
@@ -977,7 +1007,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
    pool->engine_index = engine_index;
    pool->pool_type = pool_type;

-    uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
+    channel_pool_lock_init(pool);

    num_channels = channel_pool_type_num_channels(pool_type);

@@ -1482,11 +1512,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
    if (channel_manager == NULL)
        return;

-    uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
+    proc_remove(channel_manager->procfs.pending_pushes);

    channel_manager_destroy_pools(channel_manager);

-    uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
+    proc_remove(channel_manager->procfs.channels_dir);

    uvm_pushbuffer_destroy(channel_manager->pushbuffer);

@@ -1583,7 +1613,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
    uvm_channel_manager_t *manager = channel->pool->manager;
    UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    UVM_SEQ_OR_DBG_PRINT(s, "completed          %llu\n", uvm_channel_update_completed_value(channel));
    UVM_SEQ_OR_DBG_PRINT(s, "queued             %llu\n", channel->tracking_sem.queued_value);
@@ -1595,7 +1625,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA   0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA   0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);

-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);
 }

 static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
@@ -1639,7 +1669,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c

    NvU64 completed_value = uvm_channel_update_completed_value(channel);

-    uvm_spin_lock(&channel->pool->lock);
+    uvm_channel_pool_lock(channel->pool);

    cpu_put = channel->cpu_put;

@@ -1687,7 +1717,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
                channel_print_push_acquires(push_acquire_info, seq);
        }
    }
-    uvm_spin_unlock(&channel->pool->lock);
+    uvm_channel_pool_unlock(channel->pool);
 }

 void uvm_channel_print_pending_pushes(uvm_channel_t *channel)