mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-23 00:13:59 +00:00
570.86.15
This commit is contained in:
@@ -54,6 +54,9 @@ static NV_STATUS uvm_register_callbacks(void)
|
||||
g_exported_uvm_ops.stopDevice = NULL;
|
||||
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
|
||||
|
||||
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
|
||||
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
|
||||
|
||||
// Register the UVM callbacks with the main GPU driver:
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
|
||||
if (status != NV_OK)
|
||||
@@ -100,12 +103,6 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_procfs_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
|
||||
@@ -194,15 +191,22 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
|
||||
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
|
||||
// this point:
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's
|
||||
// top-half ISR callback. As soon as this call completes, GPU interrupts
|
||||
// will start arriving, so it's important to be prepared to receive
|
||||
// interrupts before this point.
|
||||
status = uvm_register_callbacks();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
@@ -214,9 +218,7 @@ void uvm_global_exit(void)
|
||||
{
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
|
||||
|
||||
// Guarantee completion of any release callbacks scheduled after the flush
|
||||
// in uvm_resume().
|
||||
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
|
||||
uvm_unregister_callbacks();
|
||||
uvm_service_block_context_exit();
|
||||
@@ -237,7 +239,6 @@ void uvm_global_exit(void)
|
||||
|
||||
uvm_procfs_exit();
|
||||
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.global_q);
|
||||
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
|
||||
@@ -472,3 +473,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(resumeP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_gpu_in_mask(gpu, gpus) {
|
||||
NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user