535.113.01

This commit is contained in:
Maneet Singh
2023-09-21 10:43:43 -07:00
parent a8e01be6b2
commit f59818b751
94 changed files with 2414 additions and 800 deletions

View File

@@ -1345,7 +1345,6 @@ nvswitch_lib_initialize_device
NvU8 link_num;
nvlink_link *link = NULL;
NvBool is_blacklisted_by_os = NV_FALSE;
NvU64 mode;
if (!NVSWITCH_IS_DEVICE_ACCESSIBLE(device))
{
@@ -1508,18 +1507,19 @@ nvswitch_lib_initialize_device
nvswitch_reset_persistent_link_hw_state(device, link_num);
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
//
// During Nvswitch initialization, the default L1 thresholds are programmed by the
// BIOS from the BIOS tables. Save these L1 Threshold Values in scratch registers
// for use when resetting the thresholds to default.
//
nvswitch_program_l1_scratch_reg(device, link_num);
//
// WAR : Initializing the L1 threshold registers at this point as a WAR for
// Bug 3963639 where it was discussed that the L1 threshold register should have
// the default value for all available links and not just for active links.
//
nvswitch_init_lpwr_regs(link);
}
retval = nvswitch_set_training_mode(device);
@@ -1623,6 +1623,10 @@ nvswitch_lib_post_init_device
)
{
NvlStatus retval;
NvlStatus status;
NvU32 link_num;
NvU64 mode;
nvlink_link *link;
if (!NVSWITCH_IS_DEVICE_INITIALIZED(device))
{
@@ -1634,7 +1638,7 @@ nvswitch_lib_post_init_device
{
return retval;
}
if (nvswitch_is_bios_supported(device))
{
retval = nvswitch_bios_get_image(device);
@@ -1670,6 +1674,41 @@ nvswitch_lib_post_init_device
(void)nvswitch_launch_ALI(device);
}
//
// There is an edge case where a hypervisor may not send same number
// of reset to switch and GPUs, so try to re-train links in fault
// if possible
//
for (link_num=0; link_num < nvswitch_get_num_links(device); link_num++)
{
// Sanity check
if (!nvswitch_is_link_valid(device, link_num))
{
continue;
}
status = nvlink_lib_get_link(device->nvlink_device, link_num, &link);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: Failed to get link for LinkId %d\n",
__FUNCTION__, link_num);
continue;
}
// If the link is in fault then re-train
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
}
return NVL_SUCCESS;
}
@@ -4617,6 +4656,16 @@ nvswitch_init_lpwr_regs
device->hal.nvswitch_init_lpwr_regs(link);
}
void
nvswitch_program_l1_scratch_reg
(
nvswitch_device *device,
NvU32 linkNumber
)
{
device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
}
NvlStatus
nvswitch_launch_ALI
(