525.85.12

This commit is contained in:
Maneet Singh
2023-01-30 16:30:12 -08:00
parent 811073c51e
commit 1dc88ff75e
11 changed files with 154 additions and 73 deletions

View File

@@ -43,18 +43,18 @@
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r525/r528_10-249"
#define NV_BUILD_CHANGELIST_NUM (32293795)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r525/r528_10-257"
#define NV_BUILD_CHANGELIST_NUM (32360976)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r525/r528_10-249"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32293795)
#define NV_BUILD_NAME "rel/gpu_drv/r525/r528_10-257"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32360976)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r528_10-6"
#define NV_BUILD_CHANGELIST_NUM (32293795)
#define NV_BUILD_BRANCH_VERSION "r528_10-11"
#define NV_BUILD_CHANGELIST_NUM (32327477)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "528.22"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32293795)
#define NV_BUILD_NAME "528.38"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32327477)
#define NV_BUILD_BRANCH_BASE_VERSION R525
#endif
// End buildmeister python edited section

View File

@@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "525.85.05"
#define NV_VERSION_STRING "525.85.12"
#else

View File

@@ -519,6 +519,8 @@ typedef struct
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
NvBool bResetAndDrainRetry;
} NVLINK_LINK_ERROR_REPORTING;
typedef struct
@@ -748,6 +750,15 @@ typedef const struct
NvU32 swbromDataSize;
} RISCV_UCODE_HDR_INFO_LS10, *PRISCV_UCODE_HDR_INFO_LS10;
//
// defines used by internal ls10 functions to get
// specific clock status
//
#define NVSWITCH_PER_LINK_CLOCK_RXCLK 0
#define NVSWITCH_PER_LINK_CLOCK_TXCLK 1
#define NVSWITCH_PER_LINK_CLOCK_NCISOCCLK 2
#define NVSWITCH_PER_LINK_CLOCK_NUM 3
#define NVSWITCH_PER_LINK_CLOCK_SET(_name) BIT(NVSWITCH_PER_LINK_CLOCK_##_name)
//
// HAL functions shared by LR10 and used by LS10
//
@@ -971,6 +982,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
//
// SU generated functions
@@ -994,6 +1006,7 @@ NvBool nvswitch_is_inforom_supported_ls10(nvswitch_device *device);
void nvswitch_set_error_rate_threshold_ls10(nvlink_link *link, NvBool bIsDefault);
void nvswitch_configure_error_rate_threshold_interrupt_ls10(nvlink_link *link, NvBool bEnable);
NvlStatus nvswitch_reset_and_train_link_ls10(nvswitch_device *device, nvlink_link *link);
NvBool nvswitch_are_link_clocks_on_ls10(nvswitch_device *device, nvlink_link *link, NvU32 clocksMask);
#endif //_LS10_H_

View File

@@ -46,6 +46,7 @@
#include "nvswitch/ls10/dev_nvlw_ip.h"
#include "nvswitch/ls10/dev_minion_ip.h"
#include "nvswitch/ls10/dev_minion_ip_addendum.h"
#include "nvswitch/ls10/dev_cpr_ip.h"
#include "nvswitch/ls10/dev_nvlipt_ip.h"
#include "nvswitch/ls10/dev_nvlipt_lnk_ip.h"
@@ -4291,6 +4292,7 @@ _nvswitch_service_nvldl_fatal_ls10
NvU64 enabledLinkMask, localLinkMask, localEnabledLinkMask, runtimeErrorMask = 0;
NvU32 i;
nvlink_link *link;
NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK);
NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED;
NVSWITCH_LINK_TRAINING_ERROR_INFO linkTrainingErrorInfo = { 0 };
NVSWITCH_LINK_RUNTIME_ERROR_INFO linkRuntimeErrorInfo = { 0 };
@@ -4315,7 +4317,8 @@ _nvswitch_service_nvldl_fatal_ls10
break;
}
if (nvswitch_is_link_in_reset(device, link))
if (nvswitch_is_link_in_reset(device, link) ||
!nvswitch_are_link_clocks_on_ls10(device, link, clocksMask))
{
continue;
}
@@ -4862,23 +4865,6 @@ _nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10
return NVL_SUCCESS;
}
static NvBool
_nvswitch_is_ncisoc_clock_off_ls10
(
nvswitch_device *device,
nvlink_link *link
)
{
NvU32 clkStatus;
clkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_CLK_CTRL);
if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _NCISOCCLK_STS, _OFF, clkStatus))
{
return NV_TRUE;
}
return NV_FALSE;
}
NvlStatus
_nvswitch_service_nvltlc_fatal_ls10
(
@@ -4917,7 +4903,7 @@ _nvswitch_service_nvltlc_fatal_ls10
// as the IP's registers are off
//
if (nvswitch_is_link_in_reset(device, link) ||
_nvswitch_is_ncisoc_clock_off_ls10(device, link))
!nvswitch_are_link_clocks_on_ls10(device, link,NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK)));
{
continue;
}
@@ -5462,6 +5448,20 @@ _nvswitch_deferred_link_state_check_ls10
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
pLink = nvswitch_get_link(device, pErrorReportParams->link);
// If is there a retry for reset_and_drain then re-create the state check for the current link
if (chip_device->deferredLinkErrors[link].bResetAndDrainRetry == NV_TRUE)
{
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
return;
}
if ((pLink == NULL) ||
(device->hal.nvswitch_corelib_get_dl_link_mode(pLink, &linkState) != NVL_SUCCESS) ||
((linkState != NVLINK_LINKSTATE_HS) && (linkState != NVLINK_LINKSTATE_SLEEP)))
@@ -5474,8 +5474,8 @@ _nvswitch_deferred_link_state_check_ls10
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_FALSE;
}
static void
_nvswitch_create_deferred_link_state_check_task_ls10
void
nvswitch_create_deferred_link_state_check_task_ls10
(
nvswitch_device *device,
NvU32 nvlipt_instance,
@@ -5540,7 +5540,7 @@ _nvswitch_deferred_link_errors_check_ls10
bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1);
if (nvswitch_test_flags(pending, bit))
{
_nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
}
else
{
@@ -5548,7 +5548,10 @@ _nvswitch_deferred_link_errors_check_ls10
_nvswitch_clear_deferred_link_errors_ls10(device, link);
}
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
}
@@ -5569,6 +5572,8 @@ _nvswitch_create_deferred_link_errors_task_ls10
return;
}
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
status = NVL_ERR_GENERIC;
pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
if(pErrorReportParams != NULL)
@@ -5701,7 +5706,7 @@ _nvswitch_service_nvldl_nonfatal_ls10
nvlink_link *link;
NvlStatus status;
NvlStatus return_status = -NVL_NOT_FOUND;
NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK);
enabledLinkMask = nvswitch_get_enabled_link_mask(device);
localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance);
localEnabledLinkMask = enabledLinkMask & localLinkMask;
@@ -5722,7 +5727,8 @@ _nvswitch_service_nvldl_nonfatal_ls10
break;
}
if (nvswitch_is_link_in_reset(device, link))
if (nvswitch_is_link_in_reset(device, link) ||
!nvswitch_are_link_clocks_on_ls10(device, link, clocksMask))
{
continue;
}
@@ -6113,7 +6119,7 @@ _nvswitch_service_nvltlc_nonfatal_ls10
// as the IP's registers are off
//
if (nvswitch_is_link_in_reset(device, link) ||
_nvswitch_is_ncisoc_clock_off_ls10(device, link))
!nvswitch_are_link_clocks_on_ls10(device, link, NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK)))
{
continue;
}
@@ -6218,6 +6224,8 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
)
{
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
nvlink_link *link_info = nvswitch_get_link(device, link);
NvU32 lnkStateRequest, lnkStateStatus;
NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
NvU32 pending, bit, unhandled;
@@ -6244,6 +6252,32 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
if (nvswitch_test_flags(pending, bit))
{
//
// Read back LINK_STATE_REQUESTS and LINK_STATE_STATUS registers
// If request == ACTIVE, LINK_STATE_STATUS == ACTIVE_PENDING, request == ERROR
// and there is a pending FAULT_UP interrupt then redo reset_and_drain since the
// last try failed
//
// Mark that the defered link error mechanism as seeing a reset_and_train re-try so
// the deferred task needs to re-create itself instead of continuing with the linkstate
// checks
//
lnkStateStatus = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, NVLIPT_LNK,
_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS);
lnkStateRequest = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_REQUEST);
if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, _ACTIVE, lnkStateRequest) &&
!(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _STATUS, _REQUEST_SUCCESSFUL, lnkStateRequest) ||
FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _STATUS, _INIT, lnkStateRequest))&&
FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _ACTIVE_PENDING, lnkStateStatus) &&
DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1) & chip_device->deferredLinkErrors[link].fatalIntrMask.dl)
{
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
}
chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
@@ -6445,6 +6479,7 @@ _nvswitch_service_nvlw_nonfatal_ls10
{
NvlStatus status[3];
// TODO: @achaudhry invert handling so nvlipt_lnk is first
status[0] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance);
status[1] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance);
status[2] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance);
@@ -7130,6 +7165,7 @@ nvswitch_service_nvldl_fatal_link_ls10
bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1);
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].fatalIntrMask.dl |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);

View File

@@ -496,34 +496,6 @@ nvswitch_corelib_get_rx_detect_ls10
return NVL_SUCCESS;
}
static NvBool
_nvswitch_is_tlc_in_reset
(
nvswitch_device *device,
nvlink_link *link
)
{
NvU32 clkStatus;
clkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_CLK_CTRL);
//
// TLC is in reset if any of the per-link clocks are off
// -- if TX and RX clocks are off then link is not powered on
// -- if TX/RX clocks are on but NCISOC clock is off, DL layer
// is on but TLC is still off
//
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _RXCLK_STS, _OFF, clkStatus) ||
FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _TXCLK_STS, _OFF, clkStatus) ||
FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _NCISOCCLK_STS, _OFF, clkStatus))
{
return NV_TRUE;
}
return NV_FALSE;
}
void
nvswitch_reset_persistent_link_hw_state_ls10
(
@@ -531,6 +503,8 @@ nvswitch_reset_persistent_link_hw_state_ls10
NvU32 linkNumber
)
{
NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK)|NVSWITCH_PER_LINK_CLOCK_SET(TXCLK)|
NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK);
nvlink_link *link = nvswitch_get_link(device, linkNumber);
if (nvswitch_is_link_in_reset(device, link))
{
@@ -541,7 +515,8 @@ nvswitch_reset_persistent_link_hw_state_ls10
(void)nvswitch_minion_send_command(device, linkNumber, NV_MINION_NVLINK_DL_CMD_COMMAND_DLSTAT_CLR_DLERRCNT, 0);
// If TLC is not up then return
if (_nvswitch_is_tlc_in_reset(device, link))
if (!nvswitch_are_link_clocks_on_ls10(device, link, clocksMask))
{
return;
}
@@ -1583,3 +1558,51 @@ nvswitch_reset_and_train_link_ls10
return NVL_SUCCESS;
}
NvBool
nvswitch_are_link_clocks_on_ls10
(
nvswitch_device *device,
nvlink_link *link,
NvU32 clocksMask
)
{
NvU32 clockStatus;
NvU32 clk;
NvBool bIsOff = NV_FALSE;
clockStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_CLK_CTRL);
FOR_EACH_INDEX_IN_MASK(32, clk, clocksMask)
{
switch(clk)
{
case NVSWITCH_PER_LINK_CLOCK_RXCLK:
{
bIsOff = FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _RXCLK_STS, _OFF, clockStatus);
break;
}
case NVSWITCH_PER_LINK_CLOCK_TXCLK:
{
bIsOff = FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _TXCLK_STS, _OFF, clockStatus);
break;
}
case NVSWITCH_PER_LINK_CLOCK_NCISOCCLK:
{
bIsOff = FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_CLK_CTRL, _NCISOCCLK_STS, _OFF, clockStatus);
break;
}
default:
return NV_FALSE;
}
if (bIsOff)
{
return NV_FALSE;
}
}
FOR_EACH_INDEX_IN_MASK_END;
return NV_TRUE;
}

View File

@@ -544,6 +544,8 @@ typedef struct NVA080_CTRL_UPDATE_SYSMEM_BITMAP_PARAMS {
* Cap bit to indicate whether TSG timeslice override is enabled or not.
* When set true, TSG timeslice override is enabled.
* When false, TSG timeslice override is disabled.
* CAPS_GUEST_HIBERNATION_ENABLED
* Cap bit to indicate whether Guest OS Hibernation is supported or not.
* uvmEnabledFeatures
* This parameter returns mask of UVM enabled features on vGPU. It comprises of
* UVM managed APIs and replayable faults that are enabled or disabled based on
@@ -628,6 +630,9 @@ typedef struct NVA080_CTRL_UPDATE_SYSMEM_BITMAP_PARAMS {
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_USE_NON_STALL_LINUX_EVENTS 21:21
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_USE_NON_STALL_LINUX_EVENTS_FALSE (0x00000000)
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_USE_NON_STALL_LINUX_EVENTS_TRUE (0x00000001)
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_GUEST_HIBERNATION_ENABLED 22:22
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_GUEST_HIBERNATION_ENABLED_FALSE (0x00000000)
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_VGPU_DEV_CAPS_GUEST_HIBERNATION_ENABLED_TRUE (0x00000001)
/* UVM supported features */
#define NVA080_CTRL_CMD_VGPU_GET_CONFIG_PARAMS_UVM_FEATURES_REPLAYABLE_FAULTS_ENABLED 0:0

View File

@@ -887,8 +887,6 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2330, 0x16c0, 0x10de, "NVIDIA H100 80GB HBM3" },
{ 0x2330, 0x16c1, 0x10de, "NVIDIA H100 80GB HBM3" },
{ 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
{ 0x2336, 0x16c2, 0x10de, "NVIDIA H100 80GB HBM2e" },
{ 0x2336, 0x16c7, 0x10de, "NVIDIA H100 80GB HBM2e" },
{ 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
{ 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
{ 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },
@@ -952,6 +950,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x25A7, 0x0000, 0x0000, "NVIDIA GeForce RTX 2050" },
{ 0x25A9, 0x0000, 0x0000, "NVIDIA GeForce RTX 2050" },
{ 0x25AA, 0x0000, 0x0000, "NVIDIA GeForce MX570 A" },
{ 0x25AB, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 4GB Laptop GPU" },
{ 0x25AC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 6GB Laptop GPU" },
{ 0x25AD, 0x0000, 0x0000, "NVIDIA GeForce RTX 2050" },
{ 0x25B6, 0x14a9, 0x10de, "NVIDIA A16" },
{ 0x25B6, 0x157e, 0x10de, "NVIDIA A2" },
@@ -962,6 +962,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x25E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" },
{ 0x25E2, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Laptop GPU" },
{ 0x25E5, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Laptop GPU" },
{ 0x25EC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 6GB Laptop GPU" },
{ 0x25ED, 0x0000, 0x0000, "NVIDIA GeForce RTX 2050" },
{ 0x25F9, 0x0000, 0x0000, "NVIDIA RTX A1000 Embedded GPU" },
{ 0x25FA, 0x0000, 0x0000, "NVIDIA RTX A2000 Embedded GPU" },