570.86.15

This commit is contained in:
Bernhard Stoeckner
2025-01-27 19:36:56 +01:00
parent 9d0b0414a5
commit 54d69484da
1166 changed files with 318863 additions and 182687 deletions

View File

@@ -54,6 +54,9 @@ static NV_STATUS uvm_register_callbacks(void)
g_exported_uvm_ops.stopDevice = NULL;
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
// Register the UVM callbacks with the main GPU driver:
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
if (status != NV_OK)
@@ -100,12 +103,6 @@ NV_STATUS uvm_global_init(void)
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_procfs_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
@@ -194,15 +191,22 @@ NV_STATUS uvm_global_init(void)
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
// this point:
// This sets up the ISR (interrupt service routine), by hooking into RM's
// top-half ISR callback. As soon as this call completes, GPU interrupts
// will start arriving, so it's important to be prepared to receive
// interrupts before this point.
status = uvm_register_callbacks();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
return NV_OK;
error:
@@ -214,9 +218,7 @@ void uvm_global_exit(void)
{
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
// Guarantee completion of any release callbacks scheduled after the flush
// in uvm_resume().
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_service_block_context_exit();
@@ -237,7 +239,6 @@ void uvm_global_exit(void)
uvm_procfs_exit();
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
nv_kthread_q_stop(&g_uvm_global.global_q);
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
@@ -472,3 +473,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
return NV_OK;
}
static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
{
NV_STATUS status = NV_OK;
uvm_parent_gpu_t *parent_gpu;
uvm_mutex_lock(&g_uvm_global.global_lock);
// NVLINK STO recovery is not supported in combination with MIG
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
if (!parent_gpu || parent_gpu->smc.enabled) {
status = NV_ERR_INVALID_DEVICE;
goto unlock;
}
status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
unlock:
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
{
NV_STATUS status = NV_OK;
uvm_parent_gpu_t *parent_gpu;
uvm_mutex_lock(&g_uvm_global.global_lock);
// NVLINK STO recovery is not supported in combination with MIG
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
if (!parent_gpu || parent_gpu->smc.enabled) {
status = NV_ERR_INVALID_DEVICE;
goto unlock;
}
uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
unlock:
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
{
UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
}
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
{
UVM_ENTRY_RET(resumeP2P(uuid));
}
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
{
uvm_gpu_t *gpu;
for_each_gpu_in_mask(gpu, gpus) {
NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
if (status != NV_OK)
return status;
}
return NV_OK;
}