550.54.14

This commit is contained in:
Bernhard Stoeckner
2024-02-23 16:37:56 +01:00
parent 91676d6628
commit 476bd34534
186 changed files with 42509 additions and 37629 deletions

View File

@@ -222,8 +222,7 @@ _cci_module_cable_detect
}
default:
{
NVSWITCH_ASSERT(0);
break;
return -NVL_ERR_NOT_SUPPORTED;
}
}
@@ -348,8 +347,9 @@ _cci_module_identify
// Mark as faulty
device->pCci->isFaulty[moduleId] = NV_TRUE;
NVSWITCH_PRINT(device, ERROR,
"%s: Module HW check failed. Module %d\n", __FUNCTION__, moduleId);
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
"Module %d faulty\n", moduleId);
return -NVL_ERR_GENERIC;
}
@@ -612,6 +612,9 @@ _cci_module_identify_async
NvlStatus retval;
PCCI pCci = device->pCci;
CCI_MODULE_ONBOARD_STATE nextState;
CCI_MODULE_STATE *pOnboardState;
pOnboardState = &device->pCci->moduleState[moduleId];
nvswitch_os_memset(&nextState, 0, sizeof(CCI_MODULE_ONBOARD_STATE));
@@ -637,8 +640,9 @@ _cci_module_identify_async
}
default:
{
// Not expected
NVSWITCH_ASSERT(0);
// Invalid cable type
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
break;
}
@@ -646,6 +650,8 @@ _cci_module_identify_async
}
else
{
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
}

View File

@@ -7727,11 +7727,11 @@ nvswitch_ctrl_get_err_info_lr10
}
// TODO NVidia TL not supported
NVSWITCH_PRINT(device, WARN,
NVSWITCH_PRINT(device, NOISY,
"%s WARNING: Nvidia %s register %s does not exist!\n",
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_ERRLOG_REG");
NVSWITCH_PRINT(device, WARN,
NVSWITCH_PRINT(device, NOISY,
"%s WARNING: Nvidia %s register %s does not exist!\n",
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_INTEN_REG");

View File

@@ -1638,6 +1638,9 @@ nvswitch_cci_module_access_cmd_ls10
// Mark as faulty
device->pCci->isFaulty[osfp] = NV_TRUE;
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
"Module %d access error\n", osfp);
return -NVL_IO_ERROR;
}

View File

@@ -5549,6 +5549,29 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
}
}
static void
_nvswitch_dump_minion_ali_debug_registers_ls10
(
nvswitch_device *device,
NvU32 link
)
{
NVSWITCH_MINION_ALI_DEBUG_REGISTERS params;
nvlink_link *nvlink = nvswitch_get_link(device, link);
if ((nvlink != NULL) &&
(nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, &params) == NVL_SUCCESS))
{
NVSWITCH_PRINT(device, ERROR,
"%s: Minion error on link #%d!:\n"
"Minion DLSTAT MN00 = 0x%x\n"
"Minion DLSTAT UC01 = 0x%x\n"
"Minion DLSTAT UC01 = 0x%x\n",
__FUNCTION__, link,
params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr);
}
}
static void
_nvswitch_emit_link_errors_minion_fatal_ls10
(
@@ -5611,6 +5634,8 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
enabledLinks &= ~bit;
regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks);
NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData);
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
}
static void
@@ -5647,8 +5672,8 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
{
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
break;
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
break;
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt");
break;
@@ -5660,6 +5685,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
break;
}
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
}
static void

View File

@@ -42,6 +42,11 @@
#include "nvswitch/ls10/dev_minion_ip_addendum.h"
#include "ls10/minion_nvlink_defines_public_ls10.h"
#define NV_NVLINK_TLREQ_TIMEOUT_ACTIVE 10000
#define NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN 10
#define NV_NVLINK_TLREQ_TIMEOUT_RESET 4
#define NV_NVLINK_TLREQ_TIMEOUT_L2 5
static void
_nvswitch_configure_reserved_throughput_counters
(
@@ -143,9 +148,9 @@ nvswitch_init_lpwr_regs_ls10
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: Failed to set L1 Threshold\n",
__FUNCTION__);
__FUNCTION__);
}
}
}
void
nvswitch_corelib_training_complete_ls10
@@ -1433,7 +1438,7 @@ nvswitch_load_link_disable_settings_ls10
nvswitch_device *device,
nvlink_link *link
)
{
{
NvU32 regVal;
// Read state from NVLIPT HW
@@ -1443,7 +1448,7 @@ nvswitch_load_link_disable_settings_ls10
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
{
NVSWITCH_ASSERT(!cciIsLinkManaged(device, link->linkNumber));
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
@@ -1589,7 +1594,7 @@ nvswitch_reset_and_train_link_ls10
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
(link_intr_subcode == MINION_ALARM_BUSY))
{
status = nvswitch_request_tl_link_state_ls10(link,
@@ -1683,6 +1688,39 @@ nvswitch_are_link_clocks_on_ls10
return NV_TRUE;
}
static
NvlStatus
_nvswitch_tl_request_get_timeout_value_ls10
(
nvswitch_device *device,
NvU32 tlLinkState,
NvU32 *timeoutVal
)
{
switch (tlLinkState)
{
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_ACTIVE:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_ACTIVE;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_RESET;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_L2:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_L2;
break;
default:
NVSWITCH_PRINT(device, ERROR,
"%s: Invalid tlLinkState %d provided!\n",
__FUNCTION__, tlLinkState);
return NVL_BAD_ARGS;
}
return NVL_SUCCESS;
}
NvlStatus
nvswitch_request_tl_link_state_ls10
(
@@ -1696,6 +1734,9 @@ nvswitch_request_tl_link_state_ls10
NvU32 linkStatus;
NvU32 lnkErrStatus;
NvU32 bit;
NvU32 timeoutVal;
NVSWITCH_TIMEOUT timeout;
NvBool keepPolling;
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
{
@@ -1729,17 +1770,43 @@ nvswitch_request_tl_link_state_ls10
if (bSync)
{
// Wait for the TL link state register to complete
status = nvswitch_wait_for_tl_request_ready_lr10(link);
// setup timeouts for the TL request
status = _nvswitch_tl_request_get_timeout_value_ls10(device, tlLinkState, &timeoutVal);
if (status != NVL_SUCCESS)
{
return status;
return NVL_ERR_INVALID_STATE;
}
nvswitch_timeout_create(NVSWITCH_INTERVAL_1MSEC_IN_NS * timeoutVal, &timeout);
status = NVL_MORE_PROCESSING_REQUIRED;
do
{
keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) ==
tlLinkState)
{
status = NVL_SUCCESS;
break;
}
nvswitch_os_sleep(1);
}
while(keepPolling);
// Do one final check if the polling loop didn't see the target linkState
if (status == NVL_MORE_PROCESSING_REQUIRED)
{
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
tlLinkState)
{
@@ -1750,6 +1817,8 @@ nvswitch_request_tl_link_state_ls10
}
}
}
return status;
}