535.113.01

This commit is contained in:
Maneet Singh
2023-09-21 10:43:43 -07:00
parent a8e01be6b2
commit f59818b751
94 changed files with 2414 additions and 800 deletions

View File

@@ -4740,7 +4740,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
{
LinkTrainingType preferredTrainingType = trainType;
bool result;
bool bEnableFecOnSor;
//
// Validate link config against caps
//
@@ -4832,16 +4832,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
result = postLTAdjustment(activeLinkConfig, force);
}
bEnableFecOnSor = lConfig.bEnableFEC;
if (main->isEDP())
{
DeviceImpl * nativeDev = findDeviceInList(Address());
if (nativeDev && nativeDev->bIsPreviouslyFakedMuxDevice)
bEnableFecOnSor = activeLinkConfig.bEnableFEC;
}
if((lConfig.lanes != 0) && result && bEnableFecOnSor)
if((lConfig.lanes != 0) && result && activeLinkConfig.bEnableFEC)
{
//
// Extended latency from link-train end to FEC enable pattern
@@ -6057,7 +6048,7 @@ void ConnectorImpl::notifyLongPulseInternal(bool statusConnected)
if (this->bReassessMaxLink)
{
//
// If the highest assessed LC is not equal to
// If the highest assessed LC is not equal to
// max possible link config, re-assess link
//
NvU8 retries = 0U;

View File

@@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r537_13
#define NV_BUILD_BRANCH r537_41
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r537_13
#define NV_PUBLIC_BRANCH r537_41
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_13-260"
#define NV_BUILD_CHANGELIST_NUM (33206197)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_41-286"
#define NV_BUILD_CHANGELIST_NUM (33292694)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_13-260"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33206197)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_41-286"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r537_13-1"
#define NV_BUILD_CHANGELIST_NUM (33194057)
#define NV_BUILD_BRANCH_VERSION "r537_41-1"
#define NV_BUILD_CHANGELIST_NUM (33292694)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "537.17"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33194057)
#define NV_BUILD_NAME "537.42"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.104.05"
#define NV_VERSION_STRING "535.113.01"
#else

View File

@@ -20,7 +20,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fb_h_
#define __gh100_dev_fb_h_
#define NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT 8 /* */
@@ -29,4 +29,25 @@
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI 0x00100A38 /* RW-4R */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR 31:0 /* RWIVF */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK 0x000FFFFF /* ----V */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#endif // __gh100_dev_fb_h_

View File

@@ -0,0 +1,29 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fbpa_h_
#define __gh100_dev_fbpa_h_
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 4 /* */
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x009025A0+(i)*4) /* RW-4A */
#endif // __gh100_dev_fbpa_h_

View File

@@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_ltc_h_
#define __gh100_dev_ltc_h_
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT 0x001404f8 /* RW-4R */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __gh100_dev_ltc_h_

View File

@@ -0,0 +1,52 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_nv_xpl_h_
#define __gh100_dev_nv_xpl_h_
#define NV_XPL_DL_ERR_COUNT_RBUF 0x00000a54 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_RBUF__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT 0x00000a58 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_RESET 0x00000a5c /* RW-4R */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT 0:0 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT 1:1 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT 16:16 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT 17:17 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#endif // __gh100_dev_nv_xpl_h__

View File

@@ -24,4 +24,7 @@
#ifndef __gh100_dev_xtl_ep_pri_h__
#define __gh100_dev_xtl_ep_pri_h__
#define NV_EP_PCFGM 0x92FFF:0x92000 /* RW--D */
#define NV_XTL_EP_PRI_DED_ERROR_STATUS 0x0000043C /* RW-4R */
#define NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS 0x000003C8 /* RW-4R */
#endif // __gh100_dev_xtl_ep_pri_h__

View File

@@ -21,3 +21,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 52
#define NV_LTC_PRI_STRIDE 8192
#define NV_LTS_PRI_STRIDE 512
#define NV_FBPA_PRI_STRIDE 16384
#define NV_SCAL_LITTER_NUM_FBPAS 24
#define NV_XPL_BASE_ADDRESS 540672
#define NV_XTL_BASE_ADDRESS 593920

View File

@@ -47,5 +47,17 @@
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT 3:3
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT_PENDING 0x1
#define NV_XAL_EP_SCPM_PRI_DUMMY_DATA_PATTERN_INIT 0xbadf0200
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT 0x0010f364 /* RW-4R */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT 0x0010f37c /* RW-4R */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __gh100_pri_nv_xal_ep_h__

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -635,4 +635,7 @@
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT 28:28 /* RWIVF */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_SUPPORTED 0x00000001 /* RWI-V */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_NOT_SUPPORTED 0x00000000 /* RW--V */
#define NV_NVLIPT_LNK_SCRATCH_WARM 0x000007c0 /* RW-4R */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA 31:0 /* RWEVF */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT 0xdeadbaad /* RWE-V */
#endif // __ls10_dev_nvlipt_lnk_ip_h__

View File

@@ -439,6 +439,11 @@ NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/

View File

@@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
#ifdef __cplusplus
}
#endif

View File

@@ -198,3 +198,48 @@ nvlink_lib_is_registerd_device_with_reduced_config(void)
return bIsReducedConfg;
}
/*
* Get the number of devices that have the device type deviceType
*/
NvlStatus
nvlink_lib_return_device_count_by_type
(
NvU32 deviceType,
NvU32 *numDevices
)
{
NvlStatus lock_status = NVL_SUCCESS;
nvlink_device *dev = NULL;
NvU32 device_count = 0;
if (nvlink_lib_is_initialized())
{
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Loop through device list
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
if (dev->type == deviceType)
{
device_count++;
}
}
// Release top-level lock
nvlink_lib_top_lock_release();
}
*numDevices = device_count;
return NVL_SUCCESS;
}

View File

@@ -213,6 +213,7 @@
_op(void, nvswitch_reset_persistent_link_hw_state, (nvswitch_device *device, NvU32 linkNumber), _arch)\
_op(void, nvswitch_store_topology_information, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(void, nvswitch_init_lpwr_regs, (nvlink_link *link), _arch) \
_op(void, nvswitch_program_l1_scratch_reg, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvlStatus, nvswitch_set_training_mode, (nvswitch_device *device), _arch) \
_op(NvU32, nvswitch_get_sublink_width, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvBool, nvswitch_i2c_is_device_access_allowed, (nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead), _arch) \

View File

@@ -583,9 +583,12 @@ typedef struct
NvBool bDisabledRemoteEndLinkMaskCached;
} lr10_device;
#define NVSWITCH_NUM_DEVICES_PER_DELTA_LR10 6
typedef struct {
NvU32 switchPhysicalId;
NvU64 linkMask;
NvU64 accessLinkMask;
NvU64 trunkLinkMask;
} lr10_links_connected_to_disabled_remote_end;
#define NVSWITCH_GET_CHIP_DEVICE_LR10(_device) \
@@ -649,6 +652,7 @@ void nvswitch_setup_link_loopback_mode_lr10(nvswitch_device *device, NvU32
void nvswitch_reset_persistent_link_hw_state_lr10(nvswitch_device *device, NvU32 linkNumber);
void nvswitch_store_topology_information_lr10(nvswitch_device *device, nvlink_link *link);
void nvswitch_init_lpwr_regs_lr10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_lr10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_set_training_mode_lr10(nvswitch_device *device);
NvBool nvswitch_i2c_is_device_access_allowed_lr10(nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead);
NvU32 nvswitch_get_sublink_width_lr10(nvswitch_device *device,NvU32 linkNumber);

View File

@@ -529,10 +529,20 @@ typedef struct
{
NvBool bLinkErrorsCallBackEnabled;
NvBool bLinkStateCallBackEnabled;
NvBool bResetAndDrainRetry;
NvU64 lastRetrainTime;
NvU64 lastLinkUpTime;
} NVLINK_LINK_ERROR_REPORTING_STATE;
typedef struct
{
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
} NVLINK_LINK_ERROR_REPORTING_DATA;
typedef struct
{
NVLINK_LINK_ERROR_REPORTING_STATE state;
NVLINK_LINK_ERROR_REPORTING_DATA data;
} NVLINK_LINK_ERROR_REPORTING;
typedef struct
@@ -834,7 +844,6 @@ typedef const struct
#define nvswitch_setup_link_loopback_mode_ls10 nvswitch_setup_link_loopback_mode_lr10
#define nvswitch_link_lane_reversed_ls10 nvswitch_link_lane_reversed_lr10
#define nvswitch_request_tl_link_state_ls10 nvswitch_request_tl_link_state_lr10
#define nvswitch_i2c_get_port_info_ls10 nvswitch_i2c_get_port_info_lr10
#define nvswitch_i2c_set_hw_speed_mode_ls10 nvswitch_i2c_set_hw_speed_mode_lr10
@@ -929,6 +938,7 @@ void nvswitch_corelib_clear_link_state_lr10(nvlink_link *link);
NvlStatus nvswitch_corelib_set_dl_link_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
NvlStatus nvswitch_corelib_set_tx_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
void nvswitch_init_lpwr_regs_ls10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_ls10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device *device, NvU32 instance);
@@ -986,6 +996,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
NvlStatus nvswitch_request_tl_link_state_ls10(nvlink_link *link, NvU32 tlLinkState, NvBool bSync);
//
// SU generated functions

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -46,6 +46,9 @@ typedef enum _MINION_STATUS
MINION_ALARM_BUSY = 80,
} MINION_STATUS;
#define LINKSTATUS_RESET 0x0
#define LINKSTATUS_UNINIT 0x1
#define LINKSTATUS_LANESHUTDOWN 0x13
#define LINKSTATUS_EMERGENCY_SHUTDOWN 0x29
#define LINKSTATUS_INITPHASE1 0x24
#define LINKSTATUS_ACTIVE_PENDING 0x25
#endif // _MINION_NVLINK_DEFINES_PUBLIC_H_

View File

@@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@@ -43,40 +43,68 @@
#include "nvswitch/lr10/dev_nvlipt_ip.h"
#include "nvswitch/lr10/dev_nport_ip.h"
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 8 // This must be incremented if any entries are added to the array below
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 12 // This must be incremented if any entries are added to the array below
lr10_links_connected_to_disabled_remote_end nvswitchDisconnetedRemoteLinkMasks[] =
{
{
0x8, // switchPhysicalId
0x56A000500 //linkMask
0x8, // switchPhysicalId
0x56A000500, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x9, // switchPhysicalId
0x509009900 //linkMask
0x9, // switchPhysicalId
0x509009900, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0xb, // switchPhysicalId
0x56A000600 //linkMask
0xa, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0xc, // switchPhysicalId
0x4A9009400 //linkMask
0xb, // switchPhysicalId
0x56A000600, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x18, // switchPhysicalId
0x56A000500 //linkMask
0xc, // switchPhysicalId
0x4A9009400, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x19, // switchPhysicalId
0x509009900 //linkMask
0xd, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1b, // switchPhysicalId
0x56A000600 //linkMask
0x18, // switchPhysicalId
0x56A000500, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1c, // switchPhysicalId
0x4A9009400 //linkMask
0x19, // switchPhysicalId
0x509009900, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1a, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1b, // switchPhysicalId
0x56A000600, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1c, // switchPhysicalId
0x4A9009400, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1d, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
};
ct_assert(sizeof(nvswitchDisconnetedRemoteLinkMasks)/sizeof(lr10_links_connected_to_disabled_remote_end) == NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK);
@@ -653,6 +681,15 @@ nvswitch_init_lpwr_regs_lr10
tempRegVal);
}
void
nvswitch_program_l1_scratch_reg_lr10
(
nvswitch_device *device,
NvU32 linkNumber
)
{
// Not Implemented for LR10
}
void
nvswitch_init_buffer_ready_lr10
@@ -841,7 +878,6 @@ nvswitch_corelib_set_dl_link_mode_lr10
if (nvswitch_does_link_need_termination_enabled(device, link))
{
if (mode == NVLINK_LINKSTATE_INITPHASE1)
{
status = nvswitch_link_termination_setup(device, link);
@@ -2372,6 +2408,8 @@ nvswitch_load_link_disable_settings_lr10
NvU32 val;
NVLINK_CONFIG_DATA_LINKENTRY *vbios_link_entry = NULL;
NVSWITCH_BIOS_NVLINK_CONFIG *bios_config;
NvlStatus status;
lr10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
bios_config = nvswitch_get_bios_nvlink_config(device);
if ((bios_config == NULL) || (bios_config->bit_address == 0))
@@ -2412,15 +2450,16 @@ nvswitch_load_link_disable_settings_lr10
__FUNCTION__, link->linkNumber);
return;
}
val = FLD_SET_DRF(_NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, _LINK_DISABLE,
_DISABLED, val);
NVSWITCH_LINK_WR32_LR10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, val);
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
nvswitch_destroy_link(link);
status = nvswitch_link_termination_setup(device, link);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: Failed to enable termination on link #%d\n", __FUNCTION__, link->linkNumber);
return;
}
// add link to disabledRemoteEndLinkMask
chip_device->disabledRemoteEndLinkMask |= NVBIT64(link->linkNumber);
return;
}
@@ -2488,6 +2527,8 @@ nvswitch_does_link_need_termination_enabled_lr10
NvU32 i;
NvU32 physicalId;
lr10_device *chip_device;
NvU32 numNvswitches;
NvlStatus status;
physicalId = nvswitch_read_physical_id(device);
chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
@@ -2510,16 +2551,30 @@ nvswitch_does_link_need_termination_enabled_lr10
chip_device->disabledRemoteEndLinkMask = 0;
if (nvlink_lib_is_registerd_device_with_reduced_config())
{
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
{
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
{
chip_device->disabledRemoteEndLinkMask =
nvswitchDisconnetedRemoteLinkMasks[i].linkMask;
break;
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
{
chip_device->disabledRemoteEndLinkMask |=
nvswitchDisconnetedRemoteLinkMasks[i].accessLinkMask;
status = nvlink_lib_return_device_count_by_type(NVLINK_DEVICE_TYPE_NVSWITCH, &numNvswitches);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: Failed to get nvswitch device count!\n", __FUNCTION__);
break;
}
if (numNvswitches <= NVSWITCH_NUM_DEVICES_PER_DELTA_LR10)
{
chip_device->disabledRemoteEndLinkMask |=
nvswitchDisconnetedRemoteLinkMasks[i].trunkLinkMask;
}
break;
}
}
}
}
chip_device->bDisabledRemoteEndLinkMaskCached = NV_TRUE;
}

View File

@@ -5525,7 +5525,7 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
INFOROM_NVLINK_ERROR_EVENT error_event;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@@ -5565,13 +5565,13 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
NvU32 bit = BIT(localLinkIdx);
if (!chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.bPending)
if (!chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.bPending)
{
return;
}
// Grab the cached interrupt data
regData = chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.regData;
regData = chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.regData;
// get all possible interrupting links associated with this minion
report.raw_enable = link;
@@ -5628,7 +5628,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
NvU32 bit = BIT(localLinkIdx);
if (!chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.bPending)
if (!chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.bPending)
{
return;
}
@@ -5637,7 +5637,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN);
// Grab the cached interrupt data
regData = chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.regData;
regData = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.regData;
// get all possible interrupting links associated with this minion
report.raw_enable = link;
@@ -5675,7 +5675,7 @@ _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10
NvU32 pending, bit, reg;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@@ -5723,8 +5723,8 @@ _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10
NvU32 injected;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1;
injected = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1;
injected = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@@ -5760,7 +5760,7 @@ _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10
INFOROM_NVLINK_ERROR_EVENT error_event;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@@ -5805,11 +5805,11 @@ _nvswitch_clear_deferred_link_errors_ls10
)
{
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
NVLINK_LINK_ERROR_REPORTING *pLinkErrors;
NVLINK_LINK_ERROR_REPORTING_DATA *pLinkErrorsData;
pLinkErrors = &chip_device->deferredLinkErrors[link];
pLinkErrorsData = &chip_device->deferredLinkErrors[link].data;
nvswitch_os_memset(pLinkErrors, 0, sizeof(NVLINK_LINK_ERROR_REPORTING));
nvswitch_os_memset(pLinkErrorsData, 0, sizeof(NVLINK_LINK_ERROR_REPORTING_DATA));
}
static void
@@ -5824,36 +5824,47 @@ _nvswitch_deferred_link_state_check_ls10
NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance;
NvU32 link = pErrorReportParams->link;
ls10_device *chip_device;
nvlink_link *pLink;
NvU64 linkState;
NvU64 lastLinkUpTime;
NvU64 lastRetrainTime;
NvU64 current_time = nvswitch_os_get_platform_time();
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
pLink = nvswitch_get_link(device, pErrorReportParams->link);
lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime;
lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime;
// If is there a retry for reset_and_drain then re-create the state check for the current link
if (chip_device->deferredLinkErrors[link].bResetAndDrainRetry == NV_TRUE)
// Sanity Check
NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link));
nvswitch_os_free(pErrorReportParams);
pErrorReportParams = NULL;
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE;
// Link came up after last retrain
if (lastLinkUpTime >= lastRetrainTime)
{
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
return;
}
if ((pLink == NULL) ||
(device->hal.nvswitch_corelib_get_dl_link_mode(pLink, &linkState) != NVL_SUCCESS) ||
((linkState != NVLINK_LINKSTATE_HS) && (linkState != NVLINK_LINKSTATE_SLEEP)))
//
// If the last time this link was up was before the last
// reset_and_drain execution and not enough time has past since the last
// retrain then schedule another callback.
//
if (lastLinkUpTime < lastRetrainTime)
{
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
if ((current_time - lastRetrainTime) < NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS)
{
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
return;
}
}
//
// Otherwise, the link hasn't retrained within the timeout so emit the
// deferred errors.
//
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
nvswitch_os_free(pErrorReportParams);
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_FALSE;
}
void
@@ -5868,7 +5879,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
NvlStatus status;
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
if (chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled)
{
return;
}
@@ -5889,7 +5900,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
if (status == NVL_SUCCESS)
{
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_TRUE;
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_TRUE;
}
else
{
@@ -5916,25 +5927,29 @@ _nvswitch_deferred_link_errors_check_ls10
ls10_device *chip_device;
NvU32 pending;
nvswitch_os_free(pErrorReportParams);
pErrorReportParams = NULL;
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_FALSE;
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending) ||
FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending) )
{
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
}
else
{
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
}
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
// A link fault was observed which means we also did the retrain and
// scheduled a state check task. We can exit.
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending))
return;
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending))
return;
//
// No link fault, emit the deferred errors.
// It is assumed that this callback runs long before a link could have been
// retrained and hit errors again.
//
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
}
static void
@@ -5949,13 +5964,11 @@ _nvswitch_create_deferred_link_errors_task_ls10
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
NvlStatus status;
if (chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled)
if (chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled)
{
return;
}
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
status = NVL_ERR_GENERIC;
pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
if(pErrorReportParams != NULL)
@@ -5972,7 +5985,7 @@ _nvswitch_create_deferred_link_errors_task_ls10
if (status == NVL_SUCCESS)
{
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_TRUE;
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_TRUE;
}
else
{
@@ -6026,7 +6039,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1);
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
}
@@ -6049,7 +6062,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
@@ -6344,8 +6357,8 @@ _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10
bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1);
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1 |= bit;
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected |= injected;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1 |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected |= injected;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected))
@@ -6628,8 +6641,10 @@ _nvswitch_service_nvlipt_lnk_status_ls10
NvU32 pending, enabled, unhandled, bit;
NvU64 mode;
nvlink_link *link;
link = nvswitch_get_link(device, link_id);
ls10_device *chip_device;
link = nvswitch_get_link(device, link_id);
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS);
enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN);
pending &= enabled;
@@ -6669,7 +6684,13 @@ _nvswitch_service_nvlipt_lnk_status_ls10
//
nvswitch_corelib_training_complete_ls10(link);
nvswitch_init_buffer_ready(device, link, NV_TRUE);
link->bRxDetected = NV_TRUE;
link->bRxDetected = NV_TRUE;
//
// Clear out any cached interrupts for the link and update the last link up timestamp
//
_nvswitch_clear_deferred_link_errors_ls10(device, link_id);
chip_device->deferredLinkErrors[link_id].state.lastLinkUpTime = nvswitch_os_get_platform_time();
}
else if (mode == NVLINK_LINKSTATE_FAULT)
{
@@ -6706,8 +6727,6 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
)
{
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
nvlink_link *link_info = nvswitch_get_link(device, link);
NvU32 lnkStateRequest, linkState;
NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
NvU32 pending, bit, unhandled;
INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
@@ -6743,27 +6762,10 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
if (nvswitch_test_flags(pending, bit))
{
//
// Read back LINK_STATE_REQUESTS and TOP_LINK_STATE registers
// If request == ACTIVE and TOP_LINK_STATE == FAULT there is a pending
// fault on training so re-run reset_and_drain
// Mark that the defered link error mechanism as seeing a reset_and_train re-try so
// the deferred task needs to re-create itself instead of continuing with the linkstate
// checks
// based off of HW's assertion. FAILEDMINIONREQUEST always trails a DL fault. So no need to
// do reset_and_drain here
//
linkState = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, NVLDL,
_NVLDL, _TOP_LINK_STATE);
lnkStateRequest = NVSWITCH_LINK_RD32_LS10(device, link,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_REQUEST);
if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, _ACTIVE, lnkStateRequest) &&
linkState == NV_NVLDL_TOP_LINK_STATE_STATE_FAULT)
{
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
}
chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
}
@@ -7001,9 +7003,9 @@ _nvswitch_service_nvlw_nonfatal_ls10
return NVL_SUCCESS;
}
status[0] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
status[1] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
status[2] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
status[0] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
status[1] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
status[2] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) &&
(status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) &&
@@ -7373,6 +7375,28 @@ nvswitch_lib_service_interrupts_ls10
// 2. Clear leaf interrupt
// 3. Run leaf specific interrupt handler
//
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
if (val != 0)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
__FUNCTION__, val);
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
{
if (val & NVBIT(i))
{
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
__FUNCTION__, i, status);
return_status = status;
}
}
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val);
if (val != 0)
@@ -7397,28 +7421,6 @@ nvswitch_lib_service_interrupts_ls10
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
if (val != 0)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
__FUNCTION__, val);
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
{
if (val & NVBIT(i))
{
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
__FUNCTION__, i, status);
return_status = status;
}
}
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val);
if (val != 0)
@@ -7757,16 +7759,16 @@ nvswitch_service_nvldl_fatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
{
dlDeferredIntrLinkMask |= bit;
dlDeferredIntrLinkMask |= bit;
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
nvswitch_clear_flags(&unhandled, bit);
}
@@ -7774,41 +7776,25 @@ nvswitch_service_nvldl_fatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
{
dlDeferredIntrLinkMask |= bit;
dlDeferredIntrLinkMask |= bit;
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
nvswitch_clear_flags(&unhandled, bit);
}
if (bRequireResetAndDrain)
{
//
// If there is a link state callback enabled for this link then
// we hit a consecutive FAULT_UP error. set bResetAndDrainRetry
// so the current callback on completion can create a new
// callback to retry the link state check to account for the added
// delay caused by taking a 2nd fault and having to re-train
//
// If there is no callback enabled then set the error mask
// and create the link errors deferred task.
//
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
{
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
}
else
{
chip_device->deferredLinkErrors[link].fatalIntrMask.dl = dlDeferredIntrLinkMask;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
}
chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl |= dlDeferredIntrLinkMask;
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
chip_device->deferredLinkErrors[link].state.lastRetrainTime = nvswitch_os_get_platform_time();
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
}
NVSWITCH_UNHANDLED_CHECK(device, unhandled);
@@ -7916,7 +7902,7 @@ nvswitch_service_minion_link_ls10
case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT:
case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL:
case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT:
chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr =
chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr =
minionLinkIntr;
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
break;
@@ -7928,7 +7914,7 @@ nvswitch_service_minion_link_ls10
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ:
chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr =
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr =
minionLinkIntr;
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY:

View File

@@ -98,6 +98,30 @@ _nvswitch_configure_reserved_throughput_counters
DRF_DEF(_NVLTLC_TX_LNK, _DEBUG_TP_CNTR_CTRL_0, _ENABLE, _ENABLE));
}
void
nvswitch_program_l1_scratch_reg_ls10
(
nvswitch_device *device,
NvU32 linkNumber
)
{
NvU32 scrRegVal;
NvU32 tempRegVal;
// Read L1 register and store initial/VBIOS L1 Threshold Value in Scratch register
tempRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD);
scrRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM);
// Update the scratch register value only if it has not been written to before
if (scrRegVal == NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT)
{
NVSWITCH_LINK_WR32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM, tempRegVal);
}
}
#define BUG_3797211_LS10_VBIOS_VERSION 0x9610410000
void
nvswitch_init_lpwr_regs_ls10
(
@@ -110,33 +134,56 @@ nvswitch_init_lpwr_regs_ls10
NvU32 tempRegVal, lpEntryThreshold;
NvU8 softwareDesired;
NvBool bLpEnable;
NvU64 biosVersion;
if (device->regkeys.enable_pm == NV_SWITCH_REGKEY_ENABLE_PM_NO)
{
return;
}
// bios_config = nvswitch_get_bios_nvlink_config(device);
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
if (nvswitch_lib_get_bios_version(device, &biosVersion) != NVL_SUCCESS)
{
//
// TODO: get from bios. Refer Bug 3626523 for more info.
//
// The threshold is measured in 100us unit. So lpEntryThreshold = 1
// means the threshold is set to 100us in the register.
//
lpEntryThreshold = 1;
NVSWITCH_PRINT(device, WARN, "%s Get VBIOS version failed.\n",
__FUNCTION__);
biosVersion = 0;
}
// bios_config = nvswitch_get_bios_nvlink_config(device);
if (biosVersion >= BUG_3797211_LS10_VBIOS_VERSION)
{
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
{
//
// Do nothing since VBIOS (version 96.10.41.00.00 and above)
// sets the default L1 threshold.
// Refer Bug 3797211 for more info.
//
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
}
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
}
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
{
lpEntryThreshold = 1;
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
}
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
}
//LP Entry Enable
bLpEnable = NV_TRUE;
@@ -1423,7 +1470,7 @@ nvswitch_load_link_disable_settings_ls10
nvswitch_device *device,
nvlink_link *link
)
{
{
NvU32 regVal;
// Read state from NVLIPT HW
@@ -1432,7 +1479,7 @@ nvswitch_load_link_disable_settings_ls10
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
{
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
@@ -1473,7 +1520,7 @@ nvswitch_execute_unilateral_link_shutdown_ls10
// Status is explicitly ignored here since we are required to soldier-on
// in this scenario
//
status = nvswitch_request_tl_link_state_lr10(link,
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN, NV_TRUE);
if (status == NVL_SUCCESS)
@@ -1492,22 +1539,22 @@ nvswitch_execute_unilateral_link_shutdown_ls10
{
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
NVSWITCH_PRINT(device, INFO,
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
__FUNCTION__, link_intr_subcode);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
NVSWITCH_PRINT(device, INFO,
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
__FUNCTION__, link_intr_subcode);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
}
else
{
@@ -1542,6 +1589,12 @@ nvswitch_reset_and_train_link_ls10
nvswitch_execute_unilateral_link_shutdown_ls10(link);
nvswitch_corelib_clear_link_state_ls10(link);
//
// When a link faults there could be a race between the driver requesting
// reset and MINION processing Emergency Shutdown. Minion will notify if
// such a collision happens and will deny the reset request, so try the
// request up to 3 times
//
do
{
status = nvswitch_request_tl_link_state_ls10(link,
@@ -1565,24 +1618,24 @@ nvswitch_reset_and_train_link_ls10
{
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
}
else
{
break;
}
}
else
{
// failed to query minion for the link_intr_subcode so retry
@@ -1597,15 +1650,18 @@ nvswitch_reset_and_train_link_ls10
"%s: NvLink Reset has failed for link %d\n",
__FUNCTION__, link->linkNumber);
// Re-register links.
status = nvlink_lib_register_link(device->nvlink_device, link);
if (status != NVL_SUCCESS)
{
nvswitch_destroy_link(link);
return status;
}
return status;
}
status = nvswitch_launch_ALI_link_training(device, link, NV_FALSE);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: NvLink failed to request ACTIVE for link %d\n",
__FUNCTION__, link->linkNumber);
return status;
}
return NVL_SUCCESS;
}
@@ -1657,6 +1713,76 @@ nvswitch_are_link_clocks_on_ls10
return NV_TRUE;
}
NvlStatus
nvswitch_request_tl_link_state_ls10
(
nvlink_link *link,
NvU32 tlLinkState,
NvBool bSync
)
{
nvswitch_device *device = link->dev->pDevInfo;
NvlStatus status = NVL_SUCCESS;
NvU32 linkStatus;
NvU32 lnkErrStatus;
NvU32 bit;
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
{
NVSWITCH_PRINT(device, ERROR,
"%s: link #%d invalid\n",
__FUNCTION__, link->linkNumber);
return -NVL_UNBOUND_DEVICE;
}
// Wait for the TL link state register to report ready
status = nvswitch_wait_for_tl_request_ready_lr10(link);
if (status != NVL_SUCCESS)
{
return status;
}
// Clear any pending FAILEDMINIONREQUEST status that maybe populated as it is stale now
bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
lnkErrStatus = NVSWITCH_LINK_RD32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
if (nvswitch_test_flags(lnkErrStatus, bit))
{
NVSWITCH_LINK_WR32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0,
bit);
}
// Request state through CTRL_LINK_STATE_REQUEST
NVSWITCH_LINK_WR32_LS10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST,
DRF_NUM(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, tlLinkState));
if (bSync)
{
// Wait for the TL link state register to complete
status = nvswitch_wait_for_tl_request_ready_lr10(link);
if (status != NVL_SUCCESS)
{
return status;
}
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
tlLinkState)
{
NVSWITCH_PRINT(device, ERROR,
"%s: TL link state request to state 0x%x for link #%d did not complete!\n",
__FUNCTION__, tlLinkState, link->linkNumber);
return -NVL_ERR_GENERIC;
}
}
return status;
}
NvBool
nvswitch_does_link_need_termination_enabled_ls10
(

View File

@@ -1353,7 +1353,53 @@ nvswitch_init_warm_reset_ls10
)
{
NVSWITCH_PRINT(device, WARN, "%s: Function not implemented\n", __FUNCTION__);
}
}
//
// Helper funcction to query MINION to see if DL clocks are on
// return NV_TRUE if the clocks are on
// NV_FALSE if the clocks are off
static
NvBool
_nvswitch_are_dl_clocks_on
(
nvswitch_device *device,
NvU32 linkNumber
)
{
NvU32 link_state;
NvU32 stat_data;
NvlStatus status = NVL_SUCCESS;
nvlink_link * link= nvswitch_get_link(device, linkNumber);
if (link == NULL)
{
NVSWITCH_PRINT(device, ERROR, "%s: invalid link %d\n",
__FUNCTION__, linkNumber);
return NV_FALSE;
}
status = nvswitch_minion_get_dl_status(device, linkNumber,
NV_NVLSTAT_UC01, 0, &stat_data);
if (status != NVL_SUCCESS)
{
return NV_FALSE;
}
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
switch(link_state)
{
case LINKSTATUS_RESET:
case LINKSTATUS_UNINIT:
return NV_FALSE;
case LINKSTATUS_LANESHUTDOWN:
case LINKSTATUS_ACTIVE_PENDING:
return nvswitch_are_link_clocks_on_ls10(device, link,
NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK));
}
return NV_TRUE;
}
//
// Implement reset and drain sequence for ls10
@@ -1586,10 +1632,10 @@ nvswitch_reset_and_drain_links_ls10
nvswitch_soe_restore_nport_state_ls10(device, link);
// Step 7.0 : Re-program the routing table for DBEs
// Step 8.0 : Reset NVLW and NPORT interrupt state
_nvswitch_link_reset_interrupts_ls10(device, link);
// Re-register links.
status = nvlink_lib_register_link(device->nvlink_device, link_info);
if (status != NVL_SUCCESS)
@@ -1625,21 +1671,9 @@ nvswitch_reset_and_drain_links_ls10
do
{
bKeepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
bAreDlClocksOn = _nvswitch_are_dl_clocks_on(device, link);
status = nvswitch_minion_get_dl_status(device, link_info->linkNumber,
NV_NVLSTAT_UC01, 0, &stat_data);
if (status != NVL_SUCCESS)
{
continue;
}
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
bAreDlClocksOn = (link_state != LINKSTATUS_INITPHASE1) ?
NV_TRUE:NV_FALSE;
if (bAreDlClocksOn == NV_TRUE)
if (bAreDlClocksOn)
{
break;
}

View File

@@ -1345,7 +1345,6 @@ nvswitch_lib_initialize_device
NvU8 link_num;
nvlink_link *link = NULL;
NvBool is_blacklisted_by_os = NV_FALSE;
NvU64 mode;
if (!NVSWITCH_IS_DEVICE_ACCESSIBLE(device))
{
@@ -1508,18 +1507,19 @@ nvswitch_lib_initialize_device
nvswitch_reset_persistent_link_hw_state(device, link_num);
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
//
// During Nvswitch initialization, the default L1 thresholds are programmed by the
// BIOS from the BIOS tables. Save these L1 Threshold Values in scratch registers
// for use when resetting the thresholds to default.
//
nvswitch_program_l1_scratch_reg(device, link_num);
//
// WAR : Initializing the L1 threshold registers at this point as a WAR for
// Bug 3963639 where it was discussed that the L1 threshold register should have
// the default value for all available links and not just for active links.
//
nvswitch_init_lpwr_regs(link);
}
retval = nvswitch_set_training_mode(device);
@@ -1623,6 +1623,10 @@ nvswitch_lib_post_init_device
)
{
NvlStatus retval;
NvlStatus status;
NvU32 link_num;
NvU64 mode;
nvlink_link *link;
if (!NVSWITCH_IS_DEVICE_INITIALIZED(device))
{
@@ -1634,7 +1638,7 @@ nvswitch_lib_post_init_device
{
return retval;
}
if (nvswitch_is_bios_supported(device))
{
retval = nvswitch_bios_get_image(device);
@@ -1670,6 +1674,41 @@ nvswitch_lib_post_init_device
(void)nvswitch_launch_ALI(device);
}
//
// There is an edge case where a hypervisor may not send same number
// of reset to switch and GPUs, so try to re-train links in fault
// if possible
//
for (link_num=0; link_num < nvswitch_get_num_links(device); link_num++)
{
// Sanity check
if (!nvswitch_is_link_valid(device, link_num))
{
continue;
}
status = nvlink_lib_get_link(device->nvlink_device, link_num, &link);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: Failed to get link for LinkId %d\n",
__FUNCTION__, link_num);
continue;
}
// If the link is in fault then re-train
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
}
return NVL_SUCCESS;
}
@@ -4617,6 +4656,16 @@ nvswitch_init_lpwr_regs
device->hal.nvswitch_init_lpwr_regs(link);
}
void
nvswitch_program_l1_scratch_reg
(
nvswitch_device *device,
NvU32 linkNumber
)
{
device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
}
NvlStatus
nvswitch_launch_ALI
(

View File

@@ -121,7 +121,8 @@
#define NVLINK_FLA_PRIV_ERR (137)
#define ROBUST_CHANNEL_DLA_ERROR (138)
#define ROBUST_CHANNEL_FAST_PATH_ERROR (139)
#define ROBUST_CHANNEL_LAST_ERROR (ROBUST_CHANNEL_FAST_PATH_ERROR)
#define UNRECOVERABLE_ECC_ERROR_ESCAPE (140)
#define ROBUST_CHANNEL_LAST_ERROR (UNRECOVERABLE_ECC_ERROR_ESCAPE)
// Indexed CE reference

View File

@@ -95,7 +95,7 @@ NV_CRASHCAT_PACKET_FORMAT_VERSION crashcatPacketHeaderFormatVersion(NvCrashCatPa
static NV_INLINE
NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
{
NvU8 unitShift;
NvU8 unitShift = 0;
NV_CRASHCAT_MEM_UNIT_SIZE unitSize =
(NV_CRASHCAT_MEM_UNIT_SIZE)DRF_VAL64(_CRASHCAT, _PACKET_HEADER, _PAYLOAD_UNIT_SIZE, hdr);
switch (unitSize)
@@ -104,7 +104,6 @@ NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
case NV_CRASHCAT_MEM_UNIT_SIZE_1KB: unitShift = 10; break;
case NV_CRASHCAT_MEM_UNIT_SIZE_4KB: unitShift = 12; break;
case NV_CRASHCAT_MEM_UNIT_SIZE_64KB: unitShift = 16; break;
default: return 0;
}
// Increment size, since the size in the header is size - 1 (payload of 0 size is not encodable)