mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-07 00:29:58 +00:00
535.113.01
This commit is contained in:
@@ -1345,7 +1345,6 @@ nvswitch_lib_initialize_device
|
||||
NvU8 link_num;
|
||||
nvlink_link *link = NULL;
|
||||
NvBool is_blacklisted_by_os = NV_FALSE;
|
||||
NvU64 mode;
|
||||
|
||||
if (!NVSWITCH_IS_DEVICE_ACCESSIBLE(device))
|
||||
{
|
||||
@@ -1508,18 +1507,19 @@ nvswitch_lib_initialize_device
|
||||
|
||||
nvswitch_reset_persistent_link_hw_state(device, link_num);
|
||||
|
||||
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
}
|
||||
else if(mode == NVLINK_LINKSTATE_FAULT)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
nvswitch_reset_and_train_link(device, link);
|
||||
}
|
||||
//
|
||||
// During Nvswitch initialization, the default L1 thresholds are programmed by the
|
||||
// BIOS from the BIOS tables. Save these L1 Threshold Values in scratch registers
|
||||
// for use when resetting the thresholds to default.
|
||||
//
|
||||
nvswitch_program_l1_scratch_reg(device, link_num);
|
||||
|
||||
//
|
||||
// WAR : Initializing the L1 threshold registers at this point as a WAR for
|
||||
// Bug 3963639 where it was discussed that the L1 threshold register should have
|
||||
// the default value for all available links and not just for active links.
|
||||
//
|
||||
nvswitch_init_lpwr_regs(link);
|
||||
}
|
||||
|
||||
retval = nvswitch_set_training_mode(device);
|
||||
@@ -1623,6 +1623,10 @@ nvswitch_lib_post_init_device
|
||||
)
|
||||
{
|
||||
NvlStatus retval;
|
||||
NvlStatus status;
|
||||
NvU32 link_num;
|
||||
NvU64 mode;
|
||||
nvlink_link *link;
|
||||
|
||||
if (!NVSWITCH_IS_DEVICE_INITIALIZED(device))
|
||||
{
|
||||
@@ -1634,7 +1638,7 @@ nvswitch_lib_post_init_device
|
||||
{
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
if (nvswitch_is_bios_supported(device))
|
||||
{
|
||||
retval = nvswitch_bios_get_image(device);
|
||||
@@ -1670,6 +1674,41 @@ nvswitch_lib_post_init_device
|
||||
(void)nvswitch_launch_ALI(device);
|
||||
}
|
||||
|
||||
//
|
||||
// There is an edge case where a hypervisor may not send same number
|
||||
// of reset to switch and GPUs, so try to re-train links in fault
|
||||
// if possible
|
||||
//
|
||||
for (link_num=0; link_num < nvswitch_get_num_links(device); link_num++)
|
||||
{
|
||||
// Sanity check
|
||||
if (!nvswitch_is_link_valid(device, link_num))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
status = nvlink_lib_get_link(device->nvlink_device, link_num, &link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: Failed to get link for LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the link is in fault then re-train
|
||||
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
}
|
||||
else if(mode == NVLINK_LINKSTATE_FAULT)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
nvswitch_reset_and_train_link(device, link);
|
||||
}
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -4617,6 +4656,16 @@ nvswitch_init_lpwr_regs
|
||||
device->hal.nvswitch_init_lpwr_regs(link);
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_program_l1_scratch_reg
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 linkNumber
|
||||
)
|
||||
{
|
||||
device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_launch_ALI
|
||||
(
|
||||
|
||||
Reference in New Issue
Block a user