560.28.03

2026-02-21 15:33:58 +00:00 · 2024-07-19 15:45:15 -07:00
parent 5fdf5032fb
commit 448d5cc656
859 changed files with 165424 additions and 91129 deletions
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -361,7 +361,6 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
            NV_STATUS status;

            uvm_channel_update_progress(channel);
-            index = uvm_channel_index_in_pool(channel);

            channel_pool_lock(pool);

@@ -493,25 +492,20 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
 static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
 {
    NvU32 iv_index;
-    uvm_gpu_address_t notifier_gpu_va;
-    uvm_gpu_address_t auth_tag_gpu_va;
-    uvm_gpu_address_t semaphore_gpu_va;
-    uvm_gpu_address_t encrypted_payload_gpu_va;
    uvm_gpu_t *gpu = push->gpu;
    uvm_channel_t *channel = push->channel;
    uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
+    uvm_gpu_address_t notifier_gpu_va = uvm_gpu_semaphore_get_notifier_gpu_va(semaphore);
+    uvm_gpu_address_t auth_tag_gpu_va = uvm_gpu_semaphore_get_auth_tag_gpu_va(semaphore);
+    uvm_gpu_address_t encrypted_payload_gpu_va = uvm_gpu_semaphore_get_encrypted_payload_gpu_va(semaphore);
+    uvm_gpu_address_t semaphore_gpu_va = uvm_gpu_address_virtual(semaphore_va);
    UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
-    NvU32 payload_size = sizeof(*semaphore->payload);
+    NvU32 payload_size = sizeof(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
    NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;

    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

-    encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
-    notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
-    auth_tag_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.auth_tag, gpu, false);
-    semaphore_gpu_va = uvm_gpu_address_virtual(semaphore_va);
-
    iv_index = ((*last_pushed_notifier + 2) / 2) % channel->num_gpfifo_entries;

    uvm_conf_computing_log_gpu_encryption(channel, &iv_cpu_addr[iv_index]);
@@ -1710,59 +1704,24 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
    channel->conf_computing.static_pb_protected_sysmem = NULL;
    channel->conf_computing.push_crypto_bundles = NULL;

-    uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.encrypted_payload);
-    uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.notifier);
-    uvm_rm_mem_free(channel->tracking_sem.semaphore.conf_computing.auth_tag);
    uvm_kvfree(channel->tracking_sem.semaphore.conf_computing.ivs);
-    channel->tracking_sem.semaphore.conf_computing.encrypted_payload = NULL;
-    channel->tracking_sem.semaphore.conf_computing.notifier = NULL;
-    channel->tracking_sem.semaphore.conf_computing.auth_tag = NULL;
    channel->tracking_sem.semaphore.conf_computing.ivs = NULL;
 }

 static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
 {
    uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
-    NV_STATUS status;

    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

-    status = uvm_rm_mem_alloc_and_map_cpu(gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
-                                          sizeof(semaphore->conf_computing.last_pushed_notifier),
-                                          UVM_CONF_COMPUTING_BUF_ALIGNMENT,
-                                          &semaphore->conf_computing.notifier);
-
-    if (status != NV_OK)
-        return status;
-
-    status = uvm_rm_mem_alloc_and_map_cpu(gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
-                                          sizeof(*channel->tracking_sem.semaphore.payload),
-                                          UVM_CONF_COMPUTING_BUF_ALIGNMENT,
-                                          &semaphore->conf_computing.encrypted_payload);
-
-    if (status != NV_OK)
-        return status;
-
-    status = uvm_rm_mem_alloc_and_map_cpu(gpu,
-                                          UVM_RM_MEM_TYPE_SYS,
-                                          UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
-                                          UVM_CONF_COMPUTING_BUF_ALIGNMENT,
-                                          &semaphore->conf_computing.auth_tag);
-
-    if (status != NV_OK)
-        return status;
-
    semaphore->conf_computing.ivs = uvm_kvmalloc_zero(sizeof(*semaphore->conf_computing.ivs)
-                                    * channel->num_gpfifo_entries);
+                                                      * channel->num_gpfifo_entries);

    if (!semaphore->conf_computing.ivs)
        return NV_ERR_NO_MEMORY;

-    return status;
+    return NV_OK;
 }

 static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
@@ -2380,24 +2339,41 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
    return status;
 }

-static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
+static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
 {
-    if (!cap->supported || cap->grce)
-        return false;
+    return cap->supported && !cap->grce;
+}

-    switch (type) {
-        case UVM_CHANNEL_TYPE_CPU_TO_GPU:
-        case UVM_CHANNEL_TYPE_GPU_TO_CPU:
-            return cap->sysmem;
-        case UVM_CHANNEL_TYPE_GPU_INTERNAL:
-        case UVM_CHANNEL_TYPE_MEMOPS:
-            return true;
-        case UVM_CHANNEL_TYPE_GPU_TO_GPU:
-            return cap->p2p;
-        default:
-            UVM_ASSERT_MSG(false, "Unexpected channel type 0x%x\n", type);
-            return false;
+// Check that all asynchronous CEs are usable, and that there is at least one
+// such CE.
+static NV_STATUS ces_validate(uvm_channel_manager_t *manager, const UvmGpuCopyEngineCaps *ces_caps)
+{
+    unsigned ce;
+    bool found_usable_ce = false;
+
+    for (ce = 0; ce < UVM_COPY_ENGINE_COUNT_MAX; ++ce) {
+        const UvmGpuCopyEngineCaps *ce_caps = ces_caps + ce;
+
+        if (!ce_is_usable(ce_caps))
+            continue;
+
+        found_usable_ce = true;
+
+        // All channels may need to release their semaphore to sysmem.
+        // All CEs are expected to have the sysmem flag set.
+        if (!ce_caps->sysmem)
+            return NV_ERR_NOT_SUPPORTED;
+
+        // While P2P capabilities are only required for transfers between GPUs,
+        // in practice all CEs are expected to have the corresponding flag set.
+        if (!ce_caps->p2p)
+            return NV_ERR_NOT_SUPPORTED;
    }
+
+    if (!found_usable_ce)
+        return NV_ERR_NOT_SUPPORTED;
+
+    return NV_OK;
 }

 static unsigned ce_usage_count(NvU32 ce, const unsigned *preferred_ce)
@@ -2426,15 +2402,13 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
    const UvmGpuCopyEngineCaps *cap0 = ce_caps + ce_index0;
    const UvmGpuCopyEngineCaps *cap1 = ce_caps + ce_index1;

-    UVM_ASSERT(ce_usable_for_channel_type(type, cap0));
-    UVM_ASSERT(ce_usable_for_channel_type(type, cap1));
    UVM_ASSERT(ce_index0 < UVM_COPY_ENGINE_COUNT_MAX);
    UVM_ASSERT(ce_index1 < UVM_COPY_ENGINE_COUNT_MAX);
    UVM_ASSERT(ce_index0 != ce_index1);

    switch (type) {
+        // For CPU to GPU fast sysmem read is the most important
        case UVM_CHANNEL_TYPE_CPU_TO_GPU:
-            // For CPU to GPU fast sysmem read is the most important
            if (cap0->sysmemRead != cap1->sysmemRead)
                return cap1->sysmemRead - cap0->sysmemRead;

@@ -2444,8 +2418,8 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,

            break;

+        // For GPU to CPU fast sysmem write is the most important
        case UVM_CHANNEL_TYPE_GPU_TO_CPU:
-            // For GPU to CPU fast sysmem write is the most important
            if (cap0->sysmemWrite != cap1->sysmemWrite)
                return cap1->sysmemWrite - cap0->sysmemWrite;

@@ -2455,8 +2429,8 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,

            break;

+        // For GPU to GPU prefer the LCE with the most PCEs
        case UVM_CHANNEL_TYPE_GPU_TO_GPU:
-            // Prefer the LCE with the most PCEs
            {
                int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);

@@ -2466,10 +2440,10 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,

            break;

+        // For GPU_INTERNAL we want the max possible bandwidth for CEs. For now
+        // assume that the number of PCEs is a good measure.
+        // TODO: Bug 1735254: Add a direct CE query for local FB bandwidth
        case UVM_CHANNEL_TYPE_GPU_INTERNAL:
-            // We want the max possible bandwidth for CEs used for GPU_INTERNAL,
-            // for now assume that the number of PCEs is a good measure.
-            // TODO: Bug 1735254: Add a direct CE query for local FB bandwidth
            {
                int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);

@@ -2483,11 +2457,15 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,

            break;

+        // For MEMOPS we mostly care about latency which should be better with
+        // less used CEs (although we only know about our own usage and not
+        // system-wide) so just break out to get the default ordering which
+        // prioritizes usage count.
        case UVM_CHANNEL_TYPE_MEMOPS:
-            // For MEMOPS we mostly care about latency which should be better
-            // with less used CEs (although we only know about our own usage and
-            // not system-wide) so just break out to get the default ordering
-            // which prioritizes usage count.
+        // For WLC we only care about using a dedicated CE, which requires
+        // knowing the global CE mappings. For now just rely on the default
+        // ordering, which results on selecting an unused CE (if available).
+        case UVM_CHANNEL_TYPE_WLC:
            break;

        default:
@@ -2510,54 +2488,104 @@ static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
    return ce_index0 - ce_index1;
 }

-// Identify usable CEs, and select the preferred CE for a given channel type.
-static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
-                                          const UvmGpuCopyEngineCaps *ce_caps,
-                                          uvm_channel_type_t type,
-                                          unsigned *preferred_ce)
+// Select the preferred CE for the given channel types.
+static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
+                                       const UvmGpuCopyEngineCaps *ce_caps,
+                                       uvm_channel_type_t *channel_types,
+                                       unsigned num_channel_types,
+                                       unsigned *preferred_ce)
 {
-    NvU32 i;
-    NvU32 best_ce = UVM_COPY_ENGINE_COUNT_MAX;
+    unsigned i;

-    UVM_ASSERT(type < UVM_CHANNEL_TYPE_CE_COUNT);
+    // In Confidential Computing, do not mark all usable CEs, only the preferred
+    // ones, because non-preferred CE channels are guaranteed to not be used.
+    bool mark_all_usable_ces = !g_uvm_global.conf_computing_enabled;

-    for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
-        const UvmGpuCopyEngineCaps *cap = ce_caps + i;
+    for (i = 0; i < num_channel_types; ++i) {
+        unsigned ce;
+        unsigned best_ce = UVM_COPY_ENGINE_COUNT_MAX;
+        uvm_channel_type_t type = channel_types[i];

-        if (!ce_usable_for_channel_type(type, cap))
-            continue;
+        for (ce = 0; ce < UVM_COPY_ENGINE_COUNT_MAX; ++ce) {
+            if (!ce_is_usable(ce_caps + ce))
+                continue;

-        __set_bit(i, manager->ce_mask);
+            if (mark_all_usable_ces)
+                __set_bit(ce, manager->ce_mask);

-        if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
-            best_ce = i;
-            continue;
+            if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
+                best_ce = ce;
+                continue;
+            }
+
+            if (compare_ce_for_channel_type(ce_caps, type, ce, best_ce, preferred_ce) < 0)
+                best_ce = ce;
        }

-        if (compare_ce_for_channel_type(ce_caps, type, i, best_ce, preferred_ce) < 0)
-            best_ce = i;
-    }
+        UVM_ASSERT(best_ce != UVM_COPY_ENGINE_COUNT_MAX);

-    if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
-        UVM_ERR_PRINT("Failed to find a suitable CE for channel type %s\n", uvm_channel_type_to_string(type));
-        return NV_ERR_NOT_SUPPORTED;
-    }
+        preferred_ce[type] = best_ce;

-    preferred_ce[type] = best_ce;
-    return NV_OK;
+        // Preferred CEs are always marked as usable.
+        if (type < UVM_CHANNEL_TYPE_CE_COUNT)
+            __set_bit(best_ce, manager->ce_mask);
+    }
 }

-static NV_STATUS channel_manager_pick_copy_engines(uvm_channel_manager_t *manager, unsigned *preferred_ce)
+static void pick_ces(uvm_channel_manager_t *manager, const UvmGpuCopyEngineCaps *ce_caps, unsigned *preferred_ce)
 {
-    NV_STATUS status;
-    unsigned i;
-    UvmGpuCopyEnginesCaps *ces_caps;
+    // The order of picking CEs for each type matters as it's affected by
+    // the usage count of each CE and it increases every time a CE
+    // is selected. MEMOPS has the least priority as it only cares about
+    // low usage of the CE to improve latency
    uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
                                  UVM_CHANNEL_TYPE_GPU_TO_CPU,
                                  UVM_CHANNEL_TYPE_GPU_INTERNAL,
                                  UVM_CHANNEL_TYPE_GPU_TO_GPU,
                                  UVM_CHANNEL_TYPE_MEMOPS};

+    UVM_ASSERT(!g_uvm_global.conf_computing_enabled);
+
+    pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
+}
+
+static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
+                                    const UvmGpuCopyEngineCaps *ce_caps,
+                                    unsigned *preferred_ce)
+{
+    unsigned best_wlc_ce;
+
+    // The WLC type must go last so an unused CE is chosen, if available
+    uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
+                                  UVM_CHANNEL_TYPE_GPU_TO_CPU,
+                                  UVM_CHANNEL_TYPE_GPU_INTERNAL,
+                                  UVM_CHANNEL_TYPE_MEMOPS,
+                                  UVM_CHANNEL_TYPE_WLC};
+
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
+
+    pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
+
+    // Direct transfers between GPUs are disallowed in Confidential Computing,
+    // but the preferred CE is still set to an arbitrary value for consistency.
+    preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+
+    best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];
+
+    // TODO: Bug 4576908: in HCC, the WLC type should not share a CE with any
+    // channel type other than LCIC. The assertion should be a check instead.
+    UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
+}
+
+static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsigned *preferred_ce)
+{
+    NV_STATUS status;
+    UvmGpuCopyEnginesCaps *ces_caps;
+    uvm_channel_type_t type;
+
+    for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; type++)
+        preferred_ce[type] = UVM_COPY_ENGINE_COUNT_MAX;
+
    ces_caps = uvm_kvmalloc_zero(sizeof(*ces_caps));
    if (!ces_caps)
        return NV_ERR_NO_MEMORY;
@@ -2566,16 +2594,14 @@ static NV_STATUS channel_manager_pick_copy_engines(uvm_channel_manager_t *manage
    if (status != NV_OK)
        goto out;

-   // The order of picking CEs for each type matters as it's affected by the
-   // usage count of each CE and it increases every time a CE is selected.
-   // MEMOPS has the least priority as it only cares about low usage of the
-   // CE to improve latency
-    for (i = 0; i < ARRAY_SIZE(types); ++i) {
-        status = pick_ce_for_channel_type(manager, ces_caps->copyEngineCaps, types[i], preferred_ce);
-        if (status != NV_OK)
-            goto out;
-    }
+    status = ces_validate(manager, ces_caps->copyEngineCaps);
+    if (status != NV_OK)
+        goto out;

+    if (g_uvm_global.conf_computing_enabled)
+        pick_ces_conf_computing(manager, ces_caps->copyEngineCaps, preferred_ce);
+    else
+        pick_ces(manager, ces_caps->copyEngineCaps, preferred_ce);
 out:
    uvm_kvfree(ces_caps);

@@ -2641,7 +2667,7 @@ static const char *buffer_location_to_string(UVM_BUFFER_LOCATION loc)
    else if (loc == UVM_BUFFER_LOCATION_DEFAULT)
        return "auto";

-    UVM_ASSERT_MSG(false, "Invalid buffer locationvalue %d\n", loc);
+    UVM_ASSERT_MSG(false, "Invalid buffer location value %d\n", loc);
    return NULL;
 }

@@ -2818,7 +2844,9 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
    // A pool is created for each usable CE, even if it has not been selected as
    // the preferred CE for any type, because as more information is discovered
    // (for example, a pair of peer GPUs is added) we may start using the
-    // previously idle pools.
+    // previously idle pools. Configurations where non-preferred CEs are
+    // guaranteed to remain unused are allowed to avoid marking those engines as
+    // usable.
    for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
        NV_STATUS status;
        uvm_channel_pool_t *pool = NULL;
@@ -3005,17 +3033,15 @@ static NV_STATUS setup_lcic_schedule(uvm_channel_t *paired_wlc, uvm_channel_t *l
    // Reuse WLC sysmem allocation
    NvU64 gpu_unprotected = uvm_rm_mem_get_gpu_uvm_va(paired_wlc->conf_computing.static_pb_unprotected_sysmem, gpu);
    char *cpu_unprotected = paired_wlc->conf_computing.static_pb_unprotected_sysmem_cpu;
-    uvm_gpu_semaphore_t *lcic_gpu_semaphore = &lcic->tracking_sem.semaphore;
+
+    uvm_gpu_semaphore_t *lcic_semaphore = &lcic->tracking_sem.semaphore;
    uvm_gpu_address_t notifier_src_entry_addr = lcic->conf_computing.static_notifier_entry_unprotected_sysmem_gpu_va;
    uvm_gpu_address_t notifier_src_exit_addr = lcic->conf_computing.static_notifier_exit_unprotected_sysmem_gpu_va;
-    uvm_gpu_address_t notifier_dst_addr = uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.notifier,
-                                                                gpu,
-                                                                false);
-    uvm_gpu_address_t encrypted_payload_gpu_va =
-        uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.encrypted_payload, gpu, false);
+    uvm_gpu_address_t notifier_dst_addr = uvm_gpu_semaphore_get_notifier_gpu_va(lcic_semaphore);
+    uvm_gpu_address_t encrypted_payload_gpu_va = uvm_gpu_semaphore_get_encrypted_payload_gpu_va(lcic_semaphore);
+    uvm_gpu_address_t auth_tag_gpu_va = uvm_gpu_semaphore_get_auth_tag_gpu_va(lcic_semaphore);
    uvm_gpu_address_t semaphore_gpu_va = uvm_gpu_address_virtual(uvm_channel_tracking_semaphore_get_gpu_va(lcic));
-    uvm_gpu_address_t auth_tag_gpu_va = uvm_rm_mem_get_gpu_va(lcic_gpu_semaphore->conf_computing.auth_tag, gpu, false);
-    NvU32 payload_size = sizeof(*lcic->tracking_sem.semaphore.payload);
+    NvU32 payload_size = sizeof(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(lcic_semaphore));
    NvU32 notifier_size = sizeof(*lcic->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);

    NvU64 *lcic_gpfifo_entries;
@@ -3194,12 +3220,8 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager

    manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;

-    // Use the same CE as CPU TO GPU channels for WLC/LCIC
-    // Both need to use the same engine for the fixed schedule to work.
-    // TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
-    // work launch
-    // Find a metric to select the best CE to use
-    wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
+    // WLC and LCIC must use the same engine for the fixed schedule to work.
+    wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_WLC];

    // Create WLC/LCIC pools. This should be done early, CE channels use
    // them for secure launch. The WLC pool must be created before the LCIC.
@@ -3228,14 +3250,10 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager
 static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
 {
    NV_STATUS status;
-    uvm_channel_type_t type;
    unsigned max_channel_pools;
-    unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
+    unsigned preferred_ce[UVM_CHANNEL_TYPE_COUNT];

-    for (type = 0; type < ARRAY_SIZE(preferred_ce); type++)
-        preferred_ce[type] = UVM_COPY_ENGINE_COUNT_MAX;
-
-    status = channel_manager_pick_copy_engines(manager, preferred_ce);
+    status = channel_manager_pick_ces(manager, preferred_ce);
    if (status != NV_OK)
        return status;

@@ -3496,7 +3514,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
    UVM_SEQ_OR_DBG_PRINT(s, "get                %u\n", channel->gpu_get);
    UVM_SEQ_OR_DBG_PRINT(s, "put                %u\n", channel->cpu_put);
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA   0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
-    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA   0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
+    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA   0x%llx\n", (NvU64)uvm_gpu_semaphore_get_cpu_va(&channel->tracking_sem.semaphore));

    channel_pool_unlock(channel->pool);
 }