515.43.04

This commit is contained in:
Andy Ritger
2022-05-09 13:18:59 -07:00
commit 1739a20efc
2519 changed files with 1060036 additions and 0 deletions

View File

@@ -0,0 +1,566 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
//
// nvlink.h
//
#ifndef _NVLINK_H_
#define _NVLINK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <nv-kernel-interface-api.h>
#include "nvlink_common.h"
#include "nvlink_lib_ctrl.h"
#include "nv_list.h"
#include "nvlink_errors.h"
#include "nvCpuUuid.h"
// Debug Prints
#if defined(DEVELOP) || defined(DEBUG) || defined(NV_MODS)
#define NVLINK_PRINT_ENABLED 1
#define NVLINK_PRINT(format_and_stuff) nvlink_print format_and_stuff
#define DBG_MODULE_NVLINK_CORE __FILE__, __LINE__, __FUNCTION__
#define DBG_MODULE_IBMNPU DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_TEGRASHIM DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_EBRIDGE DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_NVSWITCH DBG_MODULE_NVLINK_CORE
#else
#define NVLINK_PRINT(format_and_stuff) ((void)(0))
#endif
// Devices that support NVLINK
#define NVLINK_DEVICE_TYPE_EBRIDGE 0x0
#define NVLINK_DEVICE_TYPE_IBMNPU 0x1
#define NVLINK_DEVICE_TYPE_GPU 0x2
#define NVLINK_DEVICE_TYPE_NVSWITCH 0x3
#define NVLINK_DEVICE_TYPE_TEGRASHIM 0x4
// NVLink versions
#define NVLINK_DEVICE_VERSION_10 0x00000001
#define NVLINK_DEVICE_VERSION_20 0x00000002
#define NVLINK_DEVICE_VERSION_22 0x00000004
#define NVLINK_DEVICE_VERSION_30 0x00000005
#define NVLINK_DEVICE_VERSION_31 0x00000006
#define NVLINK_DEVICE_VERSION_40 0x00000007
// Link Transition Timeouts in miliseconds
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70
// Link training seed values
#define NVLINK_MAX_SEED_NUM 6
#define NVLINK_MAX_SEED_BUFFER_SIZE NVLINK_MAX_SEED_NUM + 1
#define NVLINK_MAX_SYSTEM_LINK_NUM 624
// Forwards
struct nvlink_device;
struct nvlink_device_handle;
struct nvlink_link;
struct nvlink_link_handlers;
// nvlink device state
struct nvlink_device
{
NVListRec node;
// List of links associated with this device
NVListRec link_list;
// Uniquely identifies a device in the core
NvU64 deviceId;
// Client supplied names and ids
char *driverName;
char *deviceName;
NvU8 *uuid;
// PCI Information
struct nvlink_pci_info pciInfo;
// Device type and status
NvU64 type;
NvBool initialized;
// fabric node id
NvU16 nodeId;
// Client private information
void *pDevInfo;
};
// nvlink link change type
enum nvlink_link_change_type
{
nvlink_retrain_from_off,
nvlink_retrain_from_safe,
};
// nvlink link_change parameters
struct nvlink_link_change
{
struct nvlink_link *master;
struct nvlink_link *slave;
enum nvlink_link_change_type change_type;
};
// nvlink link state
struct nvlink_link
{
NVListRec node;
// Device the link is associated with
struct nvlink_device *dev;
// Lock for per link structure
void *linkLock;
// Uniquely identifies a link in the core
NvU64 linkId;
// If this link is the master of its connection
NvBool master;
// Client supplied link name and number
char *linkName;
NvU32 linkNumber;
NvU64 token;
// Link state
NvU32 state;
NvBool inSWCFG;
// Sublink states
NvU32 tx_sublink_state;
NvU32 rx_sublink_state;
// Has rceiver detect passed
NvBool bRxDetected;
// Link failed when sending InitPll to minion
NvBool bTxCommonModeFail;
// Link failed when transitioning to SWCFG
NvBool bSafeTransitionFail;
// Link failed when sending INITPHASE5 to minion
NvBool bInitphase5Fails;
// IP version
NvU32 version;
// Has state been saved
NvBool bStateSaved;
// Number of retries to put link to safe
NvU32 safe_retries;
// Set if LINK is ac coupled
NvBool ac_coupled;
// Number of retries to discover the other end of the link
NvU32 packet_injection_retries;
// Local Sid of the link.
NvU64 localSid;
// Remote Sid of the link.
NvU64 remoteSid;
// Remote LinkId to which the current link is connected.
NvU32 remoteLinkId;
NvU32 remoteDeviceType;
// Has INITNEGOTIATE received CONFIG_GOOD (NVL3.0+)
NvBool bInitnegotiateConfigGood;
// Power state transition status
enum
{
nvlink_power_state_in_L0,
nvlink_power_state_entering_L2,
nvlink_power_state_in_L2,
nvlink_power_state_exiting_L2
} powerStateTransitionStatus;
// Link handlers
const struct nvlink_link_handlers *link_handlers;
// Client private information
void *link_info;
// Outstanding link change request information
struct nvlink_link_change link_change;
//seed data for given nvlink
NvU32 seedData[NVLINK_MAX_SEED_BUFFER_SIZE];
};
// nvlink link handler ops
struct nvlink_link_handlers
{
NV_API_CALL NvlStatus (*add) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*remove) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*lock) (struct nvlink_link *link);
NV_API_CALL void (*unlock) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*queue_link_change) (struct nvlink_link_change *link_change);
NV_API_CALL NvlStatus (*set_dl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_dl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_detect) (struct nvlink_link *link, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_detect) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*write_discovery_token) (struct nvlink_link *link, NvU64 token);
NV_API_CALL NvlStatus (*read_discovery_token) (struct nvlink_link *link, NvU64 *token);
NV_API_CALL void (*training_complete) (struct nvlink_link *link);
NV_API_CALL void (*get_uphy_load) (struct nvlink_link *link, NvBool* bUnlocked);
};
//
// Represents an intranode connections in single/multi-node system.
// Both endpoints of the connection is visible from same node.
//
struct nvlink_intranode_conn
{
NVListRec node;
struct nvlink_link *end0;
struct nvlink_link *end1;
};
//
// Represents internode connections in a multi-node system.
// One of the endpoint of the connection must be a local link.
//
struct nvlink_internode_conn
{
NVListRec node;
struct nvlink_link *local_end;
nvlink_remote_endpoint_info remote_end;
};
// Typedefs
typedef struct nvlink_device nvlink_device;
typedef struct nvlink_device_handle nvlink_device_handle;
typedef struct nvlink_link nvlink_link;
typedef struct nvlink_link_change nvlink_link_change;
typedef struct nvlink_device_handlers nvlink_device_handlers;
typedef struct nvlink_link_handlers nvlink_link_handlers;
typedef struct nvlink_intranode_conn nvlink_intranode_conn;
typedef struct nvlink_internode_conn nvlink_internode_conn;
typedef enum nvlink_link_change_type nvlink_link_change_type;
typedef struct nvlink_inband_data nvlink_inband_data;
#define NVLINK_MAX_NUM_SAFE_RETRIES 7
#define NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES 4
// NVLINK LINK states
#define NVLINK_LINKSTATE_OFF 0x00 // OFF
#define NVLINK_LINKSTATE_HS 0x01 // High Speed
#define NVLINK_LINKSTATE_SAFE 0x02 // Safe/Discovery State
#define NVLINK_LINKSTATE_FAULT 0x03 // Faulty
#define NVLINK_LINKSTATE_RECOVERY 0x04 // Recovery
#define NVLINK_LINKSTATE_FAIL 0x05 // Unconnected/Fail
#define NVLINK_LINKSTATE_DETECT 0x06 // Detect mode
#define NVLINK_LINKSTATE_RESET 0x07 // Reset
#define NVLINK_LINKSTATE_ENABLE_PM 0x08 // Enable Link Power Management
#define NVLINK_LINKSTATE_DISABLE_PM 0x09 // Disable Link Power Management
#define NVLINK_LINKSTATE_SLEEP 0x0A // Sleep (L2)
#define NVLINK_LINKSTATE_SAVE_STATE 0x0B // Save state while entering L2
#define NVLINK_LINKSTATE_RESTORE_STATE 0x0C // Restore state while exiting L2
#define NVLINK_LINKSTATE_PRE_HS 0x0E // Settings before moving to High Speed
#define NVLINK_LINKSTATE_DISABLE_ERR_DETECT 0x0F // Disable Error detection (interrupt)
#define NVLINK_LINKSTATE_LANE_DISABLE 0x10 // Disable Lanes
#define NVLINK_LINKSTATE_LANE_SHUTDOWN 0x11 // Shutdown Lanes in PHY
#define NVLINK_LINKSTATE_TRAFFIC_SETUP 0x12 // Setup traffic flow after ACTIVE
#define NVLINK_LINKSTATE_INITPHASE1 0x13 // INITPHASE1
#define NVLINK_LINKSTATE_INITNEGOTIATE 0x14 // Initialize the negotiation (Ampere And Later)
#define NVLINK_LINKSTATE_POST_INITNEGOTIATE 0x15 // Sends DL stat
#define NVLINK_LINKSTATE_INITOPTIMIZE 0x16 // INITOPTIMIZE
#define NVLINK_LINKSTATE_POST_INITOPTIMIZE 0x17 // POST INITOPTIMIZE DL stat check
#define NVLINK_LINKSTATE_DISABLE_HEARTBEAT 0x18 // Disables the heartbeat errors
#define NVLINK_LINKSTATE_CONTAIN 0x19 // TL is in contain mode
#define NVLINK_LINKSTATE_INITTL 0x1A // INITTL
#define NVLINK_LINKSTATE_INVALID 0xFF // Invalid state
// NVLINK TX SUBLINK states
#define NVLINK_SUBLINK_STATE_TX_HS 0x0 // TX High Speed
#define NVLINK_SUBLINK_STATE_TX_SINGLE_LANE 0x4 // TX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_TX_TRAIN 0x5 // TX training
#define NVLINK_SUBLINK_STATE_TX_SAFE 0x6 // TX Safe Mode
#define NVLINK_SUBLINK_STATE_TX_OFF 0x7 // TX OFF
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE 0x8 // TX common mode enable
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE 0x9 // TX common mode disable
#define NVLINK_SUBLINK_STATE_TX_DATA_READY 0xA // Do Data Ready and Data Enable
#define NVLINK_SUBLINK_STATE_TX_EQ 0xB // TX equalization
#define NVLINK_SUBLINK_STATE_TX_PRBS_EN 0xC // TX IOBIST PRBS generator enable
#define NVLINK_SUBLINK_STATE_TX_POST_HS 0xD // TX Post High Speed settings
// NVLINK RX SUBLINK states
#define NVLINK_SUBLINK_STATE_RX_HS 0x0 // RX High Speed
#define NVLINK_SUBLINK_STATE_RX_SINGLE_LANE 0x4 // RX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_RX_TRAIN 0x5 // RX training
#define NVLINK_SUBLINK_STATE_RX_SAFE 0x6 // RX Safe Mode
#define NVLINK_SUBLINK_STATE_RX_OFF 0x7 // RX OFF
#define NVLINK_SUBLINK_STATE_RX_RXCAL 0x8 // RX in calibration
#define NVLINK_SUBLINK_STATE_RX_INIT_TERM 0x9 // Enable RX termination
// NVLINK TX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_TX_STABLE 0x0 // TX Stable
// NVLINK RX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_RX_STABLE 0x0 // RX Stable
// State change flags
#define NVLINK_STATE_CHANGE_ASYNC 0x0 // Don't wait for the state change to complete
#define NVLINK_STATE_CHANGE_SYNC 0x1 // Wait for the state change to complete
/************************************************************************************************/
/***************************** NVLink library management functions ******************************/
/************************************************************************************************/
/*
* Check if the nvlink core library is initialized
*/
NvBool nvlink_lib_is_initialized(void);
/*
* Check if there are no devices registered
*/
NvBool nvlink_lib_is_device_list_empty(void);
/************************************************************************************************/
/************************** NVLink library driver-side interface ********************************/
/***************** Manages device and link registration and un-registration *********************/
/************************************************************************************************/
/*
* Associates device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_device(nvlink_device *dev);
/*
* Unassociates device in the NVLink Core
* Includes removing any links related to the device if still registered
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_device(nvlink_device *dev);
/*
* Associates link with a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
/*
* Unassociates link from a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/
/************************************************************************************************/
/*
* Check if the device has no links registered
*/
NvBool nvlink_lib_is_link_list_empty(nvlink_device *dev);
/*
* Get the link associated with the given device's link number
*/
NvlStatus nvlink_lib_get_link(nvlink_device *device,
NvU32 link_id,
nvlink_link **link);
/*
* Set the link endpoint as the link master
*/
NvlStatus nvlink_lib_set_link_master(nvlink_link *link);
/*
* Get the link master associated with this endpoint
*/
NvlStatus nvlink_lib_get_link_master(nvlink_link *link, nvlink_link **master);
/************************************************************************************************/
/*************************** NVLink topology discovery functions ********************************/
/************************************************************************************************/
/*
* Get the connected remote endpoint information
* For a given link, return the other endpoint details it is connected
* to. If there is no connection associated with the given link, then
* conn_info.connected member will be NV_FALSE.
*
* Note: This routine will not initiate any link initialization or topology
* discovery.
*/
NvlStatus nvlink_lib_get_remote_conn_info(nvlink_link *link, nvlink_conn_info *conn_info);
/*
* Get the connected remote endpoint information
* For a given end of a link, returns the device and link information
* for the remote end along with a boolean variable that specifies if
* the topology detection was complete
*/
NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags);
/************************************************************************************************/
/****************************** NVLink initialization functions *********************************/
/************************************************************************************************/
/*
* Re-init a given link from OFF to SWCFG
*/
NvlStatus nvlink_lib_reinit_link_from_off_to_swcfg(nvlink_link *link,
NvU32 flags);
/************************************************************************************************/
/********************************** NVLink training functions ***********************************/
/************************************************************************************************/
/*
* Train a given set of links from SWCFG to ACTIVE state
* a. For low training latency - caller passes all links as an array
* b. For high training latency - caller passes link one by one
*/
NvlStatus nvlink_lib_train_links_from_swcfg_to_active(nvlink_link **links,
NvU32 linkCount,
NvU32 flags);
/*
* Train a given set of links of a device from L2 to ACTIVE state
*/
NvlStatus nvlink_lib_train_links_from_L2_to_active(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* Retrain a given link from SWCFG to ACTIVE
*/
NvlStatus nvlink_lib_retrain_link_from_swcfg_to_active(nvlink_link *link,
NvU32 flags);
/*
* Save the seed Data passed in from an endpoint driver
*/
NvlStatus nvlink_lib_save_training_seeds(nvlink_link * link,
NvU32 * seedData);
NvlStatus nvlink_lib_copy_training_seeds(nvlink_link * link,
NvU32 * seedDataCopy);
/*
* Send the endpoint driver back the seeds we have stored
*/
void nvlink_lib_restore_training_seeds(nvlink_link * link,
NvU32 * seedData);
/*
* Check that the requested links have trained to active
*/
NvlStatus nvlink_lib_check_training_complete(nvlink_link **links,
NvU32 linkCount);
/************************************************************************************************/
/********************************** NVLink shutdown functions ***********************************/
/************************************************************************************************/
/*
* [CLEAN SHUTDOWN]
* Shutdown given links of a device from active to L2 state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_L2(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* [PSEUDO-CLEAN SHUTDOWN]
* Shutdown the given array of links from ACTIVE to OFF state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_off(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Power down the given array of links from ACTIVE to SWCFG state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_swcfg(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Reset the given array of links
*/
NvlStatus nvlink_lib_reset_links(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Nvlink core library structure iterators
*/
#define FOR_EACH_DEVICE_REGISTERED(dev, head, node) \
nvListForEachEntry(dev, &head.node, node)
#define FOR_EACH_LINK_REGISTERED(link, dev, node) \
nvListForEachEntry(link, &dev->link_list, node)
#define FOR_EACH_LINK_REGISTERED_SAFE(link, next, dev, node) \
nvListForEachEntry_safe(link, next, &dev->link_list, node)
#define FOR_EACH_CONNECTION(conn, head, node) \
nvListForEachEntry(conn, &head.node, node)
#ifdef __cplusplus
}
#endif
#endif // _NVLINK_H_

View File

@@ -0,0 +1,173 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_COMMON_H_
#define _NVLINK_COMMON_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
#include "nvCpuUuid.h"
#include "nvlink_errors.h"
#ifndef NULL
#define NULL ((void *)0)
#endif
// nvlink pci bar information
struct nvlink_pci_bar_info
{
NvU64 busAddress;
NvU64 baseAddr;
NvU64 barSize;
NvU32 offset;
void *pBar;
};
#define MAX_NVLINK_BARS 2
// nvlink pci information
struct nvlink_pci_info
{
NvU32 domain;
NvU8 bus;
NvU8 device;
NvU8 function;
NvU32 pciDeviceId;
NvU32 irq;
NvBool intHooked;
struct nvlink_pci_bar_info bars[MAX_NVLINK_BARS];
};
// nvlink detailed device information
struct nvlink_detailed_device_info
{
char *deviceName;
NvU64 deviceType;
NvU8 *devUuid;
NvBool bInitialized;
void *dev_info; // Endpoint driver device info opaque
// to core lib. Passed from end point
// driver to core
struct nvlink_pci_info *pciInfo;
};
// nvlink device registration parameters
struct nvlink_device_register_params
{
//
// Core lib device info opaque to endpoint driver
// Passed from core lib to endpoint driver
//
void **deviceHandle;
char *driverName;
struct nvlink_detailed_device_info *device_params;
};
// nvlink detailed link information
struct nvlink_detailed_link_info
{
void *deviceHandle; // Core library device handle passed
// to endpoint driver during device
// registration
void *link_info; // End point driver link info opaque
// to core lib. Passed from end point
// driver to core
char *linkName;
NvU32 linkNumber;
NvU32 version;
NvBool bAcCoupled;
const void *link_handlers;
};
// nvlink link registration parameters
struct nvlink_link_register_params
{
//
// Core lib link info opaque to endpoint driver
// Passed from core lib to endpoint driver
//
void **linkHandle;
struct nvlink_detailed_link_info *link_params;
};
// nvlink client device handle
struct nvlink_device_handle
{
NvU32 linkMask;
struct nvlink_pci_info pciInfo;
};
#define NVLINK_PCI_DEV_FMT "%04x:%02x:%02x.%x"
#define NVLINK_PCI_DEV_FMT_ARGS(info) (info)->domain, \
(info)->bus, \
(info)->device, \
(info)->function
// nvlink connection information
struct nvlink_conn_info
{
NvU32 domain;
NvU16 bus;
NvU16 device;
NvU16 function;
NvU32 pciDeviceId;
NvU8 devUuid[NV_UUID_LEN];
NvU64 deviceType;
NvU32 linkNumber;
NvBool bConnected;
NvU64 chipSid;
};
// nvlink ioctrl params
struct nvlink_ioctrl_params
{
void *osPrivate;
NvU32 cmd;
void *buf;
NvU32 size;
};
// Typedefs
typedef struct nvlink_pci_bar_info nvlink_pci_bar_info;
typedef struct nvlink_pci_info nvlink_pci_info;
typedef struct nvlink_detailed_device_info nvlink_detailed_device_info;
typedef struct nvlink_detailed_link_info nvlink_detailed_link_info;
typedef struct nvlink_device_register_params nvlink_device_register_params;
typedef struct nvlink_link_register_params nvlink_link_register_params;
typedef struct nvlink_conn_info nvlink_conn_info;
typedef struct nvlink_ioctrl_params nvlink_ioctrl_params;
#ifdef __cplusplus
}
#endif
#endif //_NVLINK_COMMON_H_

View File

@@ -0,0 +1,47 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_ERRORS_H_
#define _NVLINK_ERRORS_H_
typedef int NvlStatus;
#define NVL_SUCCESS (NvlStatus) 0
#define NVL_BAD_ARGS (NvlStatus) 1
#define NVL_NO_MEM (NvlStatus) 2
#define NVL_NOT_FOUND (NvlStatus) 3
#define NVL_INITIALIZATION_PARTIAL_FAILURE (NvlStatus) 4
#define NVL_INITIALIZATION_TOTAL_FAILURE (NvlStatus) 5
#define NVL_PCI_ERROR (NvlStatus) 6
#define NVL_ERR_GENERIC (NvlStatus) 7
#define NVL_ERR_INVALID_STATE (NvlStatus) 8
#define NVL_UNBOUND_DEVICE (NvlStatus) 9
#define NVL_MORE_PROCESSING_REQUIRED (NvlStatus)10
#define NVL_IO_ERROR (NvlStatus)11
#define NVL_ERR_STATE_IN_USE (NvlStatus)12
#define NVL_ERR_NOT_SUPPORTED (NvlStatus)13
#define NVL_ERR_NOT_IMPLEMENTED (NvlStatus)14
#define NVL_ERR_INSUFFICIENT_PERMISSIONS (NvlStatus)15
#define NVL_ERR_OPERATING_SYSTEM (NvlStatus)16
#endif // _NVLINK_ERRORS_H_

View File

@@ -0,0 +1,53 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_EXPORT_H_
#define _NVLINK_EXPORT_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvlink_common.h"
/*
* Initializes core lib and does all that is needed
* to access NVLINK functionality on the current platform.
*/
NvlStatus nvlink_lib_initialize(void);
/*
* Frees any related resources and then unloads core lib.
*/
NvlStatus nvlink_lib_unload(void);
/*
* Entry point for nvlink ioctl calls.
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
#ifdef __cplusplus
}
#endif
#endif //_NVLINK_EXPORT_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,90 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_LOCK_H_
#define _NVLINK_LOCK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvlink_common.h"
/*
* Allocate top level lock. Return NVL_SUCCESS if
* the lock was allocated else return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_top_lock_alloc(void);
/*
* Free top level lock. Return NVL_SUCCESS if
* the lock was freed else return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_top_lock_free(void);
/*
* Allocate per-link lock. Return NVL_SUCCESS if
* the lock was allocated else return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_link_lock_alloc(nvlink_link *link);
/*
* Free per-link lock. Return NVL_SUCCESS if
* the lock was freed else return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_link_lock_free(nvlink_link *link);
/*
* Acquire top level lock. Return NVL_SUCCESS if
* the lock was acquired else return NVL_ERR_STATE_IN_USE.
*/
NvlStatus nvlink_lib_top_lock_acquire(void);
/*
* Release top level lock. Return NVL_SUCCESS if
* the lock was released else return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_top_lock_release(void);
/*
* Sort the array of links in order of (DBDF, link#) -
* lowest to highest and acquire link locks.
* Return NVL_SUCCESS if all the link locks were acquired.
* Else if any link lock failed to be acquired, release
* all acquired link locks and return NVL_ERR_STATE_IN_USE.
*/
NvlStatus nvlink_lib_link_locks_acquire(nvlink_link **links, int numLinks);
/*
* Loop over all the links and call nvlink_releaseLock(links[i]->linkLock).
* Return NVL_SUCCESS if all the link locks were released.
* Else if any link lock failed to be released return NVL_ERR_GENERIC.
*/
NvlStatus nvlink_lib_link_locks_release(nvlink_link **links, int numLinks);
#ifdef __cplusplus
}
#endif
#endif // _NVLINK_LOCK_H_

View File

@@ -0,0 +1,86 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_OS_H_
#define _NVLINK_OS_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvlink_common.h"
#define NVLINK_FREE(x) nvlink_free((void *)x)
// Memory management functions
void * nvlink_malloc(NvLength);
void nvlink_free(void *);
void * nvlink_memset(void *, int, NvLength);
void * nvlink_memcpy(void *, const void *, NvLength);
int nvlink_memcmp(const void *, const void *, NvLength);
NvU32 nvlink_memRd32(const volatile void *);
void nvlink_memWr32(volatile void *, NvU32);
NvU64 nvlink_memRd64(const volatile void *);
void nvlink_memWr64(volatile void *, NvU64);
// String management functions
char * nvlink_strcpy(char *, const char *);
NvLength nvlink_strlen(const char *);
int nvlink_strcmp(const char *, const char *);
int nvlink_snprintf(char *, NvLength, const char *, ...);
// Locking support functions
void * nvlink_allocLock(void);
void nvlink_acquireLock(void *);
NvBool nvlink_isLockOwner(void *);
void nvlink_releaseLock(void *);
void nvlink_freeLock(void *);
// Miscellaneous functions
void nvlink_assert(int expression);
void nvlink_sleep(unsigned int ms);
void nvlink_print(const char *, int, const char *, int, const char *, ...);
int nvlink_is_admin(void);
// Capability functions
NvlStatus nvlink_acquire_fabric_mgmt_cap(void *osPrivate, NvU64 capDescriptor);
int nvlink_is_fabric_manager(void *osPrivate);
#define NVLINK_DBG_LEVEL_INFO 0x0
#define NVLINK_DBG_LEVEL_SETUP 0x1
#define NVLINK_DBG_LEVEL_USERERRORS 0x2
#define NVLINK_DBG_LEVEL_WARNINGS 0x3
#define NVLINK_DBG_LEVEL_ERRORS 0x4
#define NVLINK_DBG_WHERE __FILE__, __LINE__, __FUNCTION__
#define NVLINK_DBG_INFO NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_INFO
#define NVLINK_DBG_SETUP NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_SETUP
#define NVLINK_DBG_USERERRORS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_USERERRORS
#define NVLINK_DBG_WARNINGS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_WARNINGS
#define NVLINK_DBG_ERRORS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_ERRORS
#ifdef __cplusplus
}
#endif
#endif //_NVLINK_OS_H_

View File

@@ -0,0 +1,527 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
/**
* For a given link, return the associated intranode connection
*
* @param[in] endpoint NVLink Link pointer
* @param[out] conn Connection associated with the link
*/
void
nvlink_core_get_intranode_conn
(
nvlink_link *endpoint,
nvlink_intranode_conn **conn
)
{
nvlink_intranode_conn *tmpConn = NULL;
FOR_EACH_CONNECTION(tmpConn, nvlinkLibCtx.nv_intraconn_head, node)
{
if (tmpConn->end0 == endpoint || tmpConn->end1 == endpoint)
{
*conn = tmpConn;
break;
}
}
}
/**
* For a given local link, return the associated internode connection
*
* @param[in] localLink NVLink Link pointer
* @param[out] conn Connection associated with the link
*/
void
nvlink_core_get_internode_conn
(
nvlink_link *localLink,
nvlink_internode_conn **conn
)
{
nvlink_internode_conn *tmpConn = NULL;
FOR_EACH_CONNECTION(tmpConn, nvlinkLibCtx.nv_interconn_head, node)
{
if (tmpConn->local_end == localLink)
{
*conn = tmpConn;
break;
}
}
}
/**
* Add a new intranode connection to the list of connections
*
* @param[in] end0 NVLink Link pointer for end0
* @param[in] end1 NVLink Link pointer for end1
*
* return NVL_SUCCESS if the conn was added successfully
*/
NvlStatus
nvlink_core_add_intranode_conn
(
nvlink_link *end0,
nvlink_link *end1
)
{
nvlink_intranode_conn *conn = NULL;
// don't do anything if we have an intranode connecction
nvlink_core_get_intranode_conn(end0, &conn);
if (conn != NULL)
{
// Verify that the other end of the connection is indeed end1
conn->end0 == end0 ?
nvlink_assert(conn->end1 == end1) :
nvlink_assert(conn->end0 == end1);
return NVL_SUCCESS;
}
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"Adding new NVLink intranode connection between %s:%s and %s:%s\n",
end0->dev->deviceName, end0->linkName,
end1->dev->deviceName, end1->linkName));
// create a new intranode connection object
conn = (nvlink_intranode_conn*)nvlink_malloc(sizeof(nvlink_intranode_conn));
if (conn == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"Adding NVLink intranode connection failed "
"due to memory allocation error\n"));
return NVL_NO_MEM;
}
nvlink_memset(conn, 0, sizeof(nvlink_intranode_conn));
// Initialize the node for the connection
nvListInit(&conn->node);
// Initialize the connection endpoints
conn->end0 = end0;
conn->end1 = end1;
// Add the connection to the list of connections
nvListAppend(&conn->node, &nvlinkLibCtx.nv_intraconn_head.node);
//
// Update the count of connected endpoints
// Loopback link, increment by 1
// Non loopback link, increment by 2
//
nvlinkLibCtx.connectedEndpoints = ( end0 == end1 ?
nvlinkLibCtx.connectedEndpoints + 1:
nvlinkLibCtx.connectedEndpoints + 2 );
return NVL_SUCCESS;
}
/**
* Add a new internode connection to the list of internode connections
*
* Note: As of now, no stats/count for internode connections.
*
* @param[in] localLink NVLink Link pointer for one end
* @param[in] remoteEndPoint Remote endpoint
*
* return NVL_SUCCESS if the conn was added succesfully
*/
NvlStatus
nvlink_core_add_internode_conn
(
nvlink_link *localLink,
nvlink_remote_endpoint_info *remoteEndPoint
)
{
nvlink_internode_conn *conn = NULL;
// Don't do anything if we have an internode connecction for local link
nvlink_core_get_internode_conn(localLink, &conn);
if (conn != NULL)
{
return NVL_SUCCESS;
}
// create a new connection
conn = (nvlink_internode_conn *)nvlink_malloc(sizeof(nvlink_internode_conn));
if (conn == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"Adding nvlink internode connection failed"
" due to memory allocation error\n"));
return NVL_NO_MEM;
}
nvlink_memset(conn, 0, sizeof(nvlink_internode_conn));
// initialize the node for the connection list
nvListInit(&conn->node);
// copy/assign the connection endpoints information
conn->local_end = localLink;
nvlink_memcpy(&conn->remote_end,
remoteEndPoint,
sizeof(nvlink_remote_endpoint_info));
// add the connection to the list of internode connections
nvListAppend(&conn->node, &nvlinkLibCtx.nv_interconn_head.node);
return NVL_SUCCESS;
}
/**
* Remove the connection from the list of intranode connections
*
* @param[in] conn NVLink connection pointer
*/
void
nvlink_core_remove_intranode_conn
(
nvlink_intranode_conn *conn
)
{
// Remove the connection from the list of connections
nvListDel(&conn->node);
//
// Update the count of connected endpoints
// Loopback link, decrement by 1
// Non loopback link, decrement by 2
//
nvlinkLibCtx.connectedEndpoints = ( conn->end0 == conn->end1 ?
nvlinkLibCtx.connectedEndpoints - 1:
nvlinkLibCtx.connectedEndpoints - 2 );
//
// Update the count of notConnected endpoints
// Loopback link, do nothing
// Non-loopback link, increment by 1
//
nvlinkLibCtx.notConnectedEndpoints = ( conn->end0 != conn->end1 ?
nvlinkLibCtx.notConnectedEndpoints + 1:
nvlinkLibCtx.notConnectedEndpoints );
nvlink_free((void *)conn);
}
/**
* Remove the connection from the list of internode connections
*
* @param[in] localLink NVLink link pointer
*/
void
nvlink_core_remove_internode_conn
(
nvlink_link *localLink
)
{
nvlink_internode_conn *conn = NULL;
nvlink_core_get_internode_conn(localLink, &conn);
if (conn != NULL)
{
nvListDel(&conn->node);
nvlink_free((void *)conn);
}
}
/**
* Check if the given intranode connection is in the specified mode
*
* @param[in] conn NVLink Connection pointer
* @param[in] linkMode Link mode
*
* return NVL_SUCCESS if the conn is in the given state
*/
NvlStatus
nvlink_core_check_intranode_conn_state
(
nvlink_intranode_conn *conn,
NvU64 linkMode
)
{
switch (linkMode)
{
case NVLINK_LINKSTATE_OFF:
{
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_OFF)) &&
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_OFF)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link already in OFF state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_SUCCESS;
}
// Check if only one end of connection is OFF
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_OFF)) ||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_OFF)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
return NVL_ERR_GENERIC;
}
case NVLINK_LINKSTATE_RESET:
{
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_RESET)) &&
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_RESET)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link already in RESET state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_SUCCESS;
}
// Check if only one end of connection is RESET
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_RESET)) ||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_RESET)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
return NVL_ERR_GENERIC;
}
case NVLINK_LINKSTATE_SAFE:
{
// Check if both ends and their sublinks are already in SAFE mode
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SAFE)) &&
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SAFE)))
{
if ((nvlink_core_check_tx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_TX_OFF)) &&
(nvlink_core_check_tx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_TX_OFF)) &&
(nvlink_core_check_rx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_RX_OFF)) &&
(nvlink_core_check_rx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_RX_OFF)))
{
//
// If links are in safe, check if sublinks are in off
// if so, we had performed pseudo-clean shutdown
//
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is not in SAFE mode. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_GENERIC;
}
else if (!((nvlink_core_check_tx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_TX_SAFE)) &&
(nvlink_core_check_tx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_TX_SAFE)) &&
(nvlink_core_check_rx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_RX_SAFE)) &&
(nvlink_core_check_rx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_RX_SAFE))))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Sublinks are in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
return NVL_SUCCESS;
}
// Check if only one end of connection is in SAFE mode
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SAFE)) ||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SAFE)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is not in SAFE mode. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_GENERIC;
}
case NVLINK_LINKSTATE_HS:
{
// Check if both ends and their sublinks are already in HS mode
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_HS)) &&
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_HS)))
{
if (!((nvlink_core_check_tx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_TX_HS)) &&
(nvlink_core_check_tx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_TX_HS)) &&
(nvlink_core_check_rx_sublink_state(conn->end0,
NVLINK_SUBLINK_STATE_RX_HS)) &&
(nvlink_core_check_rx_sublink_state(conn->end1,
NVLINK_SUBLINK_STATE_RX_HS))))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Sublinks are in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
return NVL_SUCCESS;
}
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_HS)) ||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_HS)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is not in HIGH SPEED mode. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_GENERIC;
}
case NVLINK_LINKSTATE_SLEEP:
{
// Check if both ends of connection are already in SLEEP mode
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SLEEP)) &&
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SLEEP)))
{
return NVL_SUCCESS;
}
// Check if only one end of connection is in SLEEP mode
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SLEEP)) ||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SLEEP)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is in bad state. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_INVALID_STATE;
}
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is not in SLEEP mode. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conn);
return NVL_ERR_GENERIC;
}
}
return NVL_SUCCESS;
}
/**
* Copy the intranode connection's remote endpoint information into
* the nvlink_conn_info structure passed in
*
* @param[in] remote_end NVLink Link pointer
* @param[in] conn_info Details of remote endpoint
*/
void
nvlink_core_copy_intranode_conn_info
(
nvlink_link *remote_end,
nvlink_conn_info *conn_info
)
{
// copy the remote device pci information
conn_info->domain = remote_end->dev->pciInfo.domain;
conn_info->bus = remote_end->dev->pciInfo.bus;
conn_info->device = remote_end->dev->pciInfo.device;
conn_info->function = remote_end->dev->pciInfo.function;
conn_info->pciDeviceId = remote_end->dev->pciInfo.pciDeviceId;
conn_info->chipSid = remote_end->localSid;
// copy the device type
conn_info->deviceType = remote_end->dev->type;
// copy the remote device uuid
if (remote_end->dev->uuid != NULL)
{
nvlink_memcpy(conn_info->devUuid, remote_end->dev->uuid, NV_UUID_LEN);
}
// copy the remote link number
conn_info->linkNumber = remote_end->linkNumber;
}
/**
* Copy the internode connection's remote endpoint information into
* the nvlink_conn_info structure passed in
*
* @param[in] remote_end NVLink Link pointer
* @param[in] conn_info Details of remote endpoint
*/
void
nvlink_core_copy_internode_conn_info
(
nvlink_remote_endpoint_info *remote_end,
nvlink_conn_info *conn_info
)
{
// copy the remote device pci information
conn_info->domain = remote_end->pciInfo.domain;
conn_info->bus = remote_end->pciInfo.bus;
conn_info->device = remote_end->pciInfo.device;
conn_info->function = remote_end->pciInfo.function;
conn_info->pciDeviceId = 0;
// copy the device type
conn_info->deviceType = remote_end->devType;
// copy the remote device uuid
nvlink_memcpy(conn_info->devUuid, remote_end->devUuid, NV_UUID_LEN);
// copy the remote link number
conn_info->linkNumber = remote_end->linkIndex;
}

View File

@@ -0,0 +1,383 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
static NvBool _nvlink_core_all_links_initialized(void);
static void _nvlink_core_discover_topology(void);
/**
* Get the remote end of the link
*
* For a given end of a link, returns the other end its connected to.
*
* Note: This function shouldn't be called when external fabric management is
* enabled in the endpoint drivers. Unfortunately, there is no graceful
* way to know that in the NVLink driver beforehand (during module load).
*
* @param[in] end NVLink Link pointer
* @param[out] remote_end Remote endpoint of the connection
* @param[in] flags Flags
*/
void
nvlink_core_discover_and_get_remote_end
(
nvlink_link *end,
nvlink_link **remote_end,
NvU32 flags
)
{
nvlink_intranode_conn *conn = NULL;
nvlink_device *dev = NULL;
nvlink_link *link = NULL;
NvU32 linkCount = 0;
nvlink_link **pLinks = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (pLinks == NULL)
{
return;
}
if (nvlinkLibCtx.bNewEndpoints)
{
if (!_nvlink_core_all_links_initialized())
{
// Initialize the links to SWCFG mode
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (linkCount >= NVLINK_MAX_SYSTEM_LINK_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: linkCount >= NVLINK_MAX_SYSTEM_LINK_NUM",
__FUNCTION__));
nvlink_assert(0);
nvlink_free((void *)pLinks);
return;
}
pLinks[linkCount++] = link;
}
}
{
nvlink_core_init_links_from_off_to_swcfg(pLinks, linkCount, flags);
}
}
// Re-discover the nvlink topology
_nvlink_core_discover_topology();
}
// Get the connection for the endpoint
nvlink_core_get_intranode_conn(end, &conn);
if (conn != NULL)
{
*remote_end = (conn->end0 == end ? conn->end1 : conn->end0);
}
if (pLinks != NULL)
{
nvlink_free((void *) pLinks);
}
}
/**
* Discovery process to determine topology
*
* Involves sending and reading back AN0 packets/SID values
*/
static void
_nvlink_core_discover_topology(void)
{
nvlink_device *dev0 = NULL;
nvlink_device *dev1 = NULL;
nvlink_link *end0 = NULL;
nvlink_link *end1 = NULL;
nvlink_intranode_conn *conn = NULL;
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
NvBool isTokenFound = NV_FALSE;
NvU64 token = 0;
nvlinkLibCtx.notConnectedEndpoints = 0;
FOR_EACH_DEVICE_REGISTERED(dev0, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(end0, dev0, node)
{
//
// If receiver detect failed for the link or if clocks could not be set
// up for the link, then move to next link
//
if (!end0->bRxDetected || end0->bTxCommonModeFail)
continue;
conn = NULL;
nvlink_core_get_intranode_conn(end0, &conn);
if (conn != NULL)
{
continue;
}
if (end0->packet_injection_retries > NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Packet injection retries reached for %s:%s.\n",
__FUNCTION__, end0->dev->deviceName, end0->linkName));
nvlinkLibCtx.notConnectedEndpoints++;
continue;
}
end0->link_handlers->get_dl_link_mode(end0, &linkMode);
// Packet injection can only happen on links that are in SAFE or ACTIVE
if (!((linkMode == NVLINK_LINKSTATE_SAFE) || (linkMode == NVLINK_LINKSTATE_HS)))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Packet injection only works for links in SAFE or HS %s:%s.\n",
__FUNCTION__, end0->dev->deviceName, end0->linkName));
nvlinkLibCtx.notConnectedEndpoints++;
continue;
}
//
// Send the AN0 packet
// For Nvlink3.0, token mechanism is handled by Minion.
// SW gets Sids values and so write_disocvery_token is Stubbed for Nvlink 3.0
// We use the return value of write_discovery_token to shift between
// Nvlink2.0 and NvLink3.0
//
if ((end0->version < NVLINK_DEVICE_VERSION_30) ||
((end0->localSid == 0) || (end0->remoteSid == 0)))
{
end0->link_handlers->write_discovery_token(end0, end0->token);
}
end0->packet_injection_retries++;
isTokenFound = NV_FALSE;
FOR_EACH_DEVICE_REGISTERED(dev1, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(end1, dev1, node)
{
//
// If receiver detect failed for the link or if clocks could not be
// set up for the link, then move to next link
//
if (!end1->bRxDetected || end1->bTxCommonModeFail)
continue;
token = 0;
if ((end0->version >= NVLINK_DEVICE_VERSION_30) &&
(end0->localSid != 0) && (end0->remoteSid != 0))
{
if ((end0->remoteSid == end1->localSid) &&
(end0->remoteLinkId == end1->linkNumber))
{
// Make sure the below token check passes.
token = end0->token;
}
}
else
{
// Read the RX sublink for the AN0 packet
end1->link_handlers->read_discovery_token(end1, (NvU64 *) &token);
}
// If token matches, establish the connection
if (token == end0->token)
{
isTokenFound = NV_TRUE;
//
// If R4 tokens were used for NVLink3.0+, then mark initnegotiate
// passed, since ALT training won't get kicked off without it.
//
if ((end0->version >= NVLINK_DEVICE_VERSION_30) &&
((end0->localSid == 0) || (end0->remoteSid == 0)))
{
end0->bInitnegotiateConfigGood = NV_TRUE;
end1->bInitnegotiateConfigGood = NV_TRUE;
}
// Add to the connections list
nvlink_core_add_intranode_conn(end0, end1);
break;
}
}
if (isTokenFound) break;
}
if (nvlinkLibCtx.connectedEndpoints ==
(nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.notConnectedEndpoints))
{
break;
}
}
if (nvlinkLibCtx.connectedEndpoints ==
(nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.notConnectedEndpoints))
{
break;
}
}
}
/**
* Are all links trained or is there a need to re-attempt training ?
*
* Returns true if all links trained and no need to re-attempt training
* Returns false otherwise
*/
static NvBool
_nvlink_core_all_links_initialized(void)
{
nvlink_device *dev = NULL;
nvlink_link *link = NULL;
NvU64 linkMode, txMode, rxMode;
NvU32 txSubMode, rxSubMode;
if (nvlinkLibCtx.registeredEndpoints == 0)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: No links registered with nvlink core! Sleeping.\n",
__FUNCTION__));
return NV_TRUE;
}
nvlinkLibCtx.endpointsInFail = 0;
nvlinkLibCtx.endpointsInSafe = 0;
nvlinkLibCtx.endpointsInActive = 0;
//
// Get the current state of all endpoints. This determines
// if some of the endpoints are still not trained to SAFE
//
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (nvlinkLibCtx.bNewEndpoints)
{
link->safe_retries = 0;
link->packet_injection_retries = 0;
}
if (link->state == NVLINK_LINKSTATE_FAIL)
{
if (nvlinkLibCtx.bNewEndpoints)
{
//
// New endpoints were detected. There may be a chance that
// endpoints that failed previously may transition to safe
//
link->state = NVLINK_LINKSTATE_OFF;
}
else
{
nvlinkLibCtx.endpointsInFail++;
}
continue;
}
linkMode = NVLINK_LINKSTATE_OFF;
if (link->link_handlers->get_dl_link_mode(link, &linkMode))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get link mode for %s:%s",
__FUNCTION__, link->dev->deviceName, link->linkName));
continue;
}
if (linkMode == NVLINK_LINKSTATE_SAFE)
{
//
// Link is only truly in SAFE mode if link state and sublink state
// is in SAFE/SWCFG.
// After pseudo-clean shutdown, sublinks are in OFF, so they
// need to be retrained to SAFE
//
if (link->link_handlers->get_tx_mode(link, &txMode, &txSubMode))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get tx sublink mode for %s:%s",
__FUNCTION__, link->dev->deviceName, link->linkName));
}
if (link->link_handlers->get_rx_mode(link, &rxMode, &rxSubMode))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get rx sublink mode for %s:%s",
__FUNCTION__, link->dev->deviceName, link->linkName));
}
if ((txMode == NVLINK_SUBLINK_STATE_TX_OFF) ||
(rxMode == NVLINK_SUBLINK_STATE_RX_OFF))
{
continue;
}
link->bRxDetected = NV_TRUE;
nvlinkLibCtx.endpointsInSafe++;
continue;
}
if (linkMode == NVLINK_LINKSTATE_HS)
{
link->bRxDetected = NV_TRUE;
nvlinkLibCtx.endpointsInActive++;
continue;
}
}
}
// New endpoints have been considered
nvlinkLibCtx.bNewEndpoints = NV_FALSE;
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Registered Links = %d, nvlinkLibCtx.endpointsInSafe = %d, "
" nvlinkLibCtx.endpointsInFail = %d, nvlinkLibCtx.endpointsInActive = %d\n",
__FUNCTION__,
nvlinkLibCtx.registeredEndpoints, nvlinkLibCtx.endpointsInSafe,
nvlinkLibCtx.endpointsInFail, nvlinkLibCtx.endpointsInActive));
// Determine if all links are currently trained
if ((nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.endpointsInFail -
nvlinkLibCtx.endpointsInSafe - nvlinkLibCtx.endpointsInActive) == 0)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: All connected links are in trained\n",
__FUNCTION__));
return NV_TRUE;
}
return NV_FALSE;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,692 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvVer.h"
#include "nvlink_os.h"
#include "nvlink_lib_ctrl.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
static nvlink_device_type
_nvlink_core_map_device_type
(
NvU64 type
)
{
nvlink_device_type devType;
switch (type)
{
case NVLINK_DEVICE_TYPE_IBMNPU:
devType = nvlink_device_type_ibmnpu;
break;
case NVLINK_DEVICE_TYPE_GPU:
devType = nvlink_device_type_gpu;
break;
case NVLINK_DEVICE_TYPE_NVSWITCH:
devType = nvlink_device_type_nvswitch;
break;
default:
devType = nvlink_device_type_unknown;
break;
}
return devType;
}
static nvlink_link_mode
_nvlink_core_map_link_state
(
NvU64 dlState,
NvU64 tlState
)
{
nvlink_link_mode mode;
//
// If TL has entered contain, return contain.
// Otherwise, return DL state
//
if (tlState == NVLINK_LINKSTATE_CONTAIN)
{
mode = nvlink_link_mode_contain;
return mode;
}
switch (dlState)
{
case NVLINK_LINKSTATE_OFF:
mode = nvlink_link_mode_off;
break;
case NVLINK_LINKSTATE_HS:
mode = nvlink_link_mode_active;
break;
case NVLINK_LINKSTATE_SAFE:
mode = nvlink_link_mode_swcfg;
break;
case NVLINK_LINKSTATE_FAULT:
mode = nvlink_link_mode_fault;
break;
case NVLINK_LINKSTATE_RECOVERY:
mode = nvlink_link_mode_recovery;
break;
case NVLINK_LINKSTATE_FAIL:
mode = nvlink_link_mode_fail;
break;
case NVLINK_LINKSTATE_DETECT:
mode = nvlink_link_mode_detect;
break;
case NVLINK_LINKSTATE_RESET:
mode = nvlink_link_mode_reset;
break;
case NVLINK_LINKSTATE_ENABLE_PM:
mode = nvlink_link_mode_enable_pm;
break;
case NVLINK_LINKSTATE_DISABLE_PM:
mode = nvlink_link_mode_disable_pm;
break;
case NVLINK_LINKSTATE_TRAFFIC_SETUP:
mode = nvlink_link_mode_traffic_setup;
break;
default:
mode = nvlink_link_mode_unknown;
break;
}
return mode;
}
static nvlink_tx_sublink_mode
_nvlink_core_map_tx_sublink_state
(
NvU64 state
)
{
nvlink_tx_sublink_mode mode;
switch (state)
{
case NVLINK_SUBLINK_STATE_TX_HS:
mode = nvlink_tx_sublink_mode_hs;
break;
case NVLINK_SUBLINK_STATE_TX_SINGLE_LANE:
mode = nvlink_tx_sublink_mode_single_lane;
break;
case NVLINK_SUBLINK_STATE_TX_TRAIN:
mode = nvlink_tx_sublink_mode_train;
break;
case NVLINK_SUBLINK_STATE_TX_SAFE:
mode = nvlink_tx_sublink_mode_safe;
break;
case NVLINK_SUBLINK_STATE_TX_OFF:
mode = nvlink_tx_sublink_mode_off;
break;
case NVLINK_SUBLINK_STATE_TX_COMMON_MODE:
mode = nvlink_tx_sublink_mode_common_mode;
break;
case NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE:
mode = nvlink_tx_sublink_mode_common_mode_disable;
break;
case NVLINK_SUBLINK_STATE_TX_DATA_READY:
mode = nvlink_tx_sublink_mode_data_ready;
break;
case NVLINK_SUBLINK_STATE_TX_EQ:
mode = nvlink_tx_sublink_mode_tx_eq;
break;
case NVLINK_SUBLINK_STATE_TX_PRBS_EN:
mode = nvlink_tx_sublink_mode_pbrs_en;
break;
case NVLINK_SUBLINK_STATE_TX_POST_HS:
mode = nvlink_tx_sublink_mode_post_hs;
break;
default:
mode = nvlink_tx_sublink_mode_unknown;
break;
}
return mode;
}
static nvlink_rx_sublink_mode
_nvlink_core_map_rx_sublink_state
(
NvU64 state
)
{
nvlink_rx_sublink_mode mode;
switch (state)
{
case NVLINK_SUBLINK_STATE_RX_HS:
mode = nvlink_rx_sublink_mode_hs;
break;
case NVLINK_SUBLINK_STATE_RX_SINGLE_LANE:
mode = nvlink_rx_sublink_mode_single_lane;
break;
case NVLINK_SUBLINK_STATE_RX_TRAIN:
mode = nvlink_rx_sublink_mode_train;
break;
case NVLINK_SUBLINK_STATE_RX_SAFE:
mode = nvlink_rx_sublink_mode_safe;
break;
case NVLINK_SUBLINK_STATE_RX_OFF:
mode = nvlink_rx_sublink_mode_off;
break;
case NVLINK_SUBLINK_STATE_RX_RXCAL:
mode = nvlink_rx_sublink_mode_rxcal;
break;
default:
mode = nvlink_rx_sublink_mode_unknown;
break;
}
return mode;
}
/**
* Check if the link is already initialized
*
* Note: A link is initialized if it is in SWCFG or ACTIVE state
*
* @param[in] linkMode Link state
*
* NvBool
*/
static NvBool
_nvlink_core_is_link_initialized
(
NvU64 linkMode
)
{
if ((linkMode == NVLINK_LINKSTATE_SAFE) ||
(linkMode == NVLINK_LINKSTATE_HS))
{
return NV_TRUE;
}
else
{
return NV_FALSE;
}
}
/**
* Get the mask of enabled links for the device
*
* @param[in] dev nvlink_device pointer
*
* NvU64
*/
static NvU64
_nvlink_core_get_enabled_link_mask
(
nvlink_device *dev
)
{
NvU64 linkMask = 0x0;
nvlink_link *link = NULL;
nvListForEachEntry(link, &dev->link_list, node)
{
linkMask |= NVBIT64(link->linkNumber);
}
return linkMask;
}
/**
* Check if the device type is supported
*
* @param[in] devType Device type
*
* NvBool
*/
NvBool
nvlink_core_is_supported_device_type
(
NvU32 devType
)
{
if ((devType == nvlink_device_type_ibmnpu) ||
(devType == nvlink_device_type_gpu) ||
(devType == nvlink_device_type_nvswitch))
{
return NV_TRUE;
}
else
{
return NV_FALSE;
}
}
/**
* Get the link and sublink states for the endpoint
*
* @param[in] link nvlink_device *
* @param[out] linkState nvlink_link_state *
*/
void
nvlink_core_get_endpoint_state
(
nvlink_link *link,
nvlink_link_state *linkState
)
{
NvlStatus status = NVL_SUCCESS;
NvU32 txSubLinkSubstate = NVLINK_SUBLINK_SUBSTATE_TX_STABLE;
NvU32 rxSubLinkSubState = NVLINK_SUBLINK_SUBSTATE_RX_STABLE;
NvU64 state = NVLINK_LINKSTATE_INVALID;
NvU64 dlState = NVLINK_LINKSTATE_INVALID;
NvU64 tlState = NVLINK_LINKSTATE_INVALID;
//
// This is a best case effort to return the current state of the link
// to user as part of the ioctl call. Typically, this call should succeed
// unless the corresponding HAL/Callbacks are not registered, which can
// happen during early development cycle. Adding an assert to catch that
// in debug builds.
//
status = link->link_handlers->get_dl_link_mode(link, &dlState);
nvlink_assert(status == NVL_SUCCESS);
link->link_handlers->get_tl_link_mode(link, &tlState);
linkState->linkMode = _nvlink_core_map_link_state(dlState, tlState);
status = link->link_handlers->get_tx_mode(link,
&state,
&txSubLinkSubstate);
nvlink_assert(status == NVL_SUCCESS);
linkState->txSubLinkMode = _nvlink_core_map_tx_sublink_state(state);
status = link->link_handlers->get_rx_mode(link,
&state,
&rxSubLinkSubState);
nvlink_assert(status == NVL_SUCCESS);
linkState->rxSubLinkMode = _nvlink_core_map_rx_sublink_state(state);
}
/**
* Get the nvlink_device * from the PCI DBDF
*
* @param[in] devInfo PCI Information
* @param[out] dev nvlink_device *
*/
void
nvlink_core_get_device_by_devinfo
(
nvlink_device_info *devInfo,
nvlink_device **dev
)
{
nvlink_device *tmpDev = NULL;
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
{
if ( (tmpDev->nodeId == devInfo->nodeId) &&
(tmpDev->pciInfo.domain == devInfo->pciInfo.domain) &&
(tmpDev->pciInfo.bus == devInfo->pciInfo.bus) &&
(tmpDev->pciInfo.device == devInfo->pciInfo.device) &&
(tmpDev->pciInfo.function == devInfo->pciInfo.function))
{
*dev = tmpDev;
return;
}
}
// not found any matching device
*dev = NULL;
}
/**
* Get the nvlink_link * from the PCI DBDF and link#
*
* @param[in] endPoint PCI Information and link#
* @param[out] link nvlink_link *
*/
void
nvlink_core_get_link_by_endpoint
(
nvlink_endpoint *endPoint,
nvlink_link **link
)
{
nvlink_device *tmpDev = NULL;
nvlink_link *tmpLink = NULL;
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
{
if ((tmpDev->nodeId == endPoint->nodeId) &&
(tmpDev->pciInfo.domain == endPoint->pciInfo.domain) &&
(tmpDev->pciInfo.bus == endPoint->pciInfo.bus) &&
(tmpDev->pciInfo.device == endPoint->pciInfo.device) &&
(tmpDev->pciInfo.function == endPoint->pciInfo.function))
{
FOR_EACH_LINK_REGISTERED(tmpLink, tmpDev, node)
{
if (tmpLink->linkNumber == endPoint->linkIndex)
{
*link = tmpLink;
return;
}
}
}
}
// not found any matching link
*link = NULL;
}
/**
* Given the nvlink_link ptr, copy the endpoint details for the link
*
* @param[in] connLink nvlink_link *
* @param[out] endPointInfo Endpoint details for the link
*/
void
nvlink_core_copy_endpoint_info
(
nvlink_link *connLink,
nvlink_endpoint *endPointInfo
)
{
nvlink_device *dev = connLink->dev;
endPointInfo->pciInfo.domain = dev->pciInfo.domain;
endPointInfo->pciInfo.bus = dev->pciInfo.bus;
endPointInfo->pciInfo.device = dev->pciInfo.device;
endPointInfo->pciInfo.function = dev->pciInfo.function;
endPointInfo->nodeId = dev->nodeId;
endPointInfo->linkIndex = connLink->linkNumber;
}
/**
* Given the nvlink_device ptr, copy the device details
*
* @param[in] tmpDev nvlink_device *
* @param[out] devInfo Device details
*/
void
nvlink_core_copy_device_info
(
nvlink_device *tmpDev,
nvlink_detailed_dev_info *devInfo
)
{
devInfo->pciInfo.domain = tmpDev->pciInfo.domain;
devInfo->pciInfo.bus = tmpDev->pciInfo.bus;
devInfo->pciInfo.device = tmpDev->pciInfo.device;
devInfo->pciInfo.function = tmpDev->pciInfo.function;
devInfo->numLinks = nvListCount(&tmpDev->link_list);
devInfo->devType = _nvlink_core_map_device_type(tmpDev->type);
devInfo->enabledLinkMask = _nvlink_core_get_enabled_link_mask(tmpDev);
// copy device uuid information if available
if (tmpDev->uuid != NULL)
{
nvlink_memcpy(devInfo->devUuid, tmpDev->uuid, NVLINK_UUID_LEN);
}
// copy device name information if available
if (tmpDev->deviceName != NULL)
{
int nameLen = nvlink_strlen(tmpDev->deviceName);
int copyLen = 0;
copyLen = (nameLen > NVLINK_DEVICE_NAME_LEN_MAX) ? NVLINK_DEVICE_NAME_LEN_MAX : nameLen;
nvlink_memcpy(devInfo->deviceName, tmpDev->deviceName, copyLen);
}
}
/**
* Transition to SWCFG on the given array of links
*
* @param[in] links Array of nvlink_link pointers
* @param[in] numLinks Number of links in the array
*
* return NvlStatus
*/
NvlStatus
nvlink_core_link_init_async
(
nvlink_link **links,
NvU32 numLinks
)
{
NvU32 i;
// Sanity check the links array for non-zero links
nvlink_assert((links != NULL) && (numLinks > 0));
for (i = 0; i < numLinks; i++)
{
NvlStatus status = NVL_SUCCESS;
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
if (!links[i]->bRxDetected || links[i]->bTxCommonModeFail)
{
// link did not pass RXDET or failed in common mode, don't do anything
continue;
}
status = links[i]->link_handlers->get_dl_link_mode(links[i], &linkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get link mode for %s:%s\n",
__FUNCTION__, links[i]->dev->deviceName, links[i]->linkName));
continue;
}
// TODO : Handle fault checking
if (_nvlink_core_is_link_initialized(linkMode))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: %s:%s is already trained to SAFE or HIGH SPEED\n",
__FUNCTION__, links[i]->dev->deviceName, links[i]->linkName));
// link already in higher state. don't do anything
continue;
}
// Put the link in SAFE state
links[i]->link_handlers->set_dl_link_mode(links[i],
NVLINK_LINKSTATE_SAFE,
NVLINK_STATE_CHANGE_SYNC);
}
//
// We could have links which are faulty and cannot be initialized. But proceeding
// the initialization sequence allows us to use other non-faulty links. Therefore
// return success always.
//
return NVL_SUCCESS;
}
/**
* Generate a discovery token for the given link
*
* @param[in] link nvlink_link pointer
*
* return NvU64
*/
NvU64
nvlink_core_get_link_discovery_token
(
nvlink_link *link
)
{
NvU64 token = 0;
//
// generate a unique token value for discovering connections.
// link->token is the memory address of the allocated link object,
// which is unique within a node. Adding fabric node id
// to make it unique across different nodes.
//
token = link->token & ~((NvU64)NVLINK_FABRIC_NODE_ID_MASK << NVLINK_FABRIC_NODE_ID_POS);
token = token | ((NvU64)link->dev->nodeId << NVLINK_FABRIC_NODE_ID_POS);
return token;
}
/**
* Write the dicovery token for the given link
*
* @param[in] link nvlink_link pointer
* @param[in] token Discovery token to write
*
* return NvlStatus
*/
NvlStatus
nvlink_core_write_link_discovery_token
(
nvlink_link *link,
NvU64 token
)
{
NvlStatus status = NVL_SUCCESS;
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
// Packet injection can only happen if link is in SWCFG/ACTIVE
status = link->link_handlers->get_dl_link_mode(link, &linkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get link mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return status;
}
if (!_nvlink_core_is_link_initialized(linkMode))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Packet injection only works for links in SAFE or HS %s:%s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NVL_ERR_INVALID_STATE;
}
// Send the token (AN0 packet)
link->link_handlers->write_discovery_token(link, token);
return NVL_SUCCESS;
}
/**
* Read the dicovery token for the given link
*
* @param[in] link nvlink_link pointer
*
* return NvU64
*/
NvU64
nvlink_core_read_link_discovery_token
(
nvlink_link *link
)
{
NvU64 token = 0;
NvlStatus status = NVL_SUCCESS;
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
status = link->link_handlers->get_dl_link_mode(link, &linkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get link mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return 0;
}
if (!_nvlink_core_is_link_initialized(linkMode))
{
return 0;
}
// query discovery token from the link
link->link_handlers->read_discovery_token(link, (NvU64 *) &token);
return token;
}
/**
* Detect the connection by correlating the tokens
*
* @param[in] srcLink nvlink_link pointer
* @param[in] writeToken write token
* @param[in] skipReadToken token vs sid for connection detection
*
*/
void
nvlink_core_correlate_conn_by_token
(
nvlink_link *srcLink,
NvU64 writeToken,
NvBool skipReadToken
)
{
nvlink_device *dev = NULL;
nvlink_link *dstLink = NULL;
NvU64 readToken = 0;
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(dstLink, dev, node)
{
nvlink_intranode_conn *conn = NULL;
nvlink_core_get_intranode_conn(dstLink, &conn);
if (conn != NULL)
{
// skip token read if we already have a connection for the dstLink
continue;
}
if (skipReadToken)
{
if ((srcLink->remoteSid == dstLink->localSid) &&
(srcLink->remoteLinkId == dstLink->linkNumber))
{
readToken = writeToken;
}
}
else
{
// Read the RX sublink for the AN0 packet
readToken = nvlink_core_read_link_discovery_token(dstLink);
}
// If token matches, establish the connection
if (writeToken == readToken)
{
// Add to the connections list
nvlink_core_add_intranode_conn(srcLink, dstLink);
return;
}
}
}
}

View File

@@ -0,0 +1,446 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
/**
* For the given link, check whether the link state is at the requested state.
*
* @param[in] link NVLink link pointer
* @param[in] linkState Target Link State
*
* return NV_TRUE if the link is in the given state
*/
NvBool
nvlink_core_check_link_state
(
nvlink_link *link,
NvU64 linkState
)
{
NvU64 crntDlLinkMode = NVLINK_LINKSTATE_OFF;
NvU64 crntTlLinkMode = NVLINK_LINKSTATE_OFF;
NvlStatus status = NVL_SUCCESS;
switch (linkState)
{
case NVLINK_LINKSTATE_OFF:
case NVLINK_LINKSTATE_RESET:
case NVLINK_LINKSTATE_SAFE:
case NVLINK_LINKSTATE_HS:
{
status = link->link_handlers->get_dl_link_mode(link, &crntDlLinkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get DL link mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NV_FALSE;
}
if (crntDlLinkMode == linkState)
{
return NV_TRUE;
}
break;
}
case NVLINK_LINKSTATE_SLEEP:
{
status = link->link_handlers->get_tl_link_mode(link, &crntTlLinkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get TL link mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NV_FALSE;
}
if (crntTlLinkMode == linkState)
{
return NV_TRUE;
}
break;
}
}
// return false for default case or the states are not matching
return NV_FALSE;
}
/**
* For the given link, check whether the tx sublink state is at the
* requested state.
*
* @param[in] link NVLink link pointer
* @param[in] txSublinkState Target Tx Sublink State
*
* return NV_TRUE if the tx sublink is in the given state
*/
NvBool
nvlink_core_check_tx_sublink_state
(
nvlink_link *link,
NvU64 txSublinkState
)
{
NvlStatus status = NVL_SUCCESS;
NvU64 crntTxSublinkMode = NVLINK_SUBLINK_STATE_TX_OFF;
NvU32 crntTxSublinkSubMode = NVLINK_SUBLINK_SUBSTATE_TX_STABLE;
status = link->link_handlers->get_tx_mode(link,
&crntTxSublinkMode,
&crntTxSublinkSubMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get TX sublink mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NV_FALSE;
}
switch (txSublinkState)
{
case NVLINK_SUBLINK_STATE_TX_OFF:
{
if (crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_OFF)
{
return NV_TRUE;
}
break;
}
case NVLINK_SUBLINK_STATE_TX_SAFE:
{
if (crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_SAFE)
{
return NV_TRUE;
}
break;
}
case NVLINK_SUBLINK_STATE_TX_HS:
{
if ((crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_HS) ||
(crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_SINGLE_LANE))
{
return NV_TRUE;
}
break;
}
}
// return false for default case or the states are not matching
return NV_FALSE;
}
/**
* For the given link, check whether the rx sublink state is at the
* requested state.
*
* @param[in] link NVLink link pointer
* @param[in] rxSublinkState Target Rx Sublink State
*
* return NV_TRUE if the rx sublink is in the given state
*/
NvBool
nvlink_core_check_rx_sublink_state
(
nvlink_link *link,
NvU64 rxSublinkState
)
{
NvlStatus status = NVL_SUCCESS;
NvU64 crntRxSublinkMode = NVLINK_SUBLINK_STATE_RX_OFF;
NvU32 crntRxSublinkSubMode = NVLINK_SUBLINK_SUBSTATE_RX_STABLE;
status = link->link_handlers->get_rx_mode(link,
&crntRxSublinkMode,
&crntRxSublinkSubMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get TX sublink mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NV_FALSE;
}
switch (rxSublinkState)
{
case NVLINK_SUBLINK_STATE_RX_OFF:
{
if (crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_OFF)
{
return NV_TRUE;
}
break;
}
case NVLINK_SUBLINK_STATE_RX_SAFE:
{
if (crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_SAFE)
{
return NV_TRUE;
}
break;
}
case NVLINK_SUBLINK_STATE_RX_HS:
{
if ((crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_HS) ||
(crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_SINGLE_LANE))
{
return NV_TRUE;
}
break;
}
}
// return false for default case or the states are not matching
return NV_FALSE;
}
/**
* Poll the link to reach the specified state upto the given timeout.
* Link state transition is considered failed once timeout occurs.
*
* @param[in] link NVLink link pointer
* @param[in] linkState Target Link state
* @param[in] timeout Timeout
*
* return NVL_SUCCESS if the link transitioned to the target state
*/
NvlStatus
nvlink_core_poll_link_state
(
nvlink_link *link,
NvU64 linkState,
NvU32 timeout
)
{
NvU64 currentLinkState = ~0;
link->link_handlers->get_dl_link_mode(link, &currentLinkState);
while (currentLinkState != linkState)
{
nvlink_sleep(1);
timeout--;
if (!timeout)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Timeout occured while polling on link.\n",
__FUNCTION__));
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link info: device: %s link: %s link state "
"expected: 0x%08llx actual: 0x%08llx.\n",
__FUNCTION__, link->dev->deviceName, link->linkName,
linkState, currentLinkState));
return NVL_ERR_INVALID_STATE;
}
link->link_handlers->get_dl_link_mode(link, &currentLinkState);
}
return NVL_SUCCESS;
}
/**
* Poll for a given timeout period for the sublink to reach a given
* state. Sublink state transition is considered as failed once the
* timeout occurs
*
* @param[in] localTxSubLink Local NVLink pointer
* @param[in] localTxSubLinkState Local Tx Sublink State
* @param[in] localTxSubLinkSubState Local Tx Sublink Substate
* @param[in] remoteRxSubLink Remote NVLink pointer
* @param[in] remoteRxSubLinkState Remote Rx Sublink State
* @param[in] remoteRxSubLinkSubState Remote Rx Sublink Substate
* @param[in] timeout Timeout
*
* return NVL_SUCCESS is the sublink transitioned to the given state
*/
NvlStatus
nvlink_core_poll_sublink_state
(
nvlink_link *localTxSubLink,
NvU64 localTxSubLinkState,
NvU32 localTxSubLinkSubState,
nvlink_link *remoteRxSubLink,
NvU64 remoteRxSubLinkState,
NvU32 remoteRxSubLinkSubState,
NvU32 timeout
)
{
NvlStatus status = NVL_SUCCESS;
// check for tx sublink if a valid link is specified
if (localTxSubLink)
{
status = nvlink_core_poll_tx_sublink_state(localTxSubLink,
localTxSubLinkState,
localTxSubLinkSubState,
timeout);
if (status != NVL_SUCCESS)
{
// polling on tx sublink failed. skip any rx polling
return status;
}
}
//
// check for rx sublink if a valid link is specified and no
// timeout on tx sublink (if it was specified)
//
if (remoteRxSubLink)
{
status = nvlink_core_poll_rx_sublink_state(remoteRxSubLink,
remoteRxSubLinkState,
remoteRxSubLinkSubState,
timeout);
}
return status;
}
/**
* Poll for the tx sublink to reach the specified state upto the given
* timeout. Sublink state transition is considered failed once timeout
* occurs.
*
* @param[in] link NVLink pointer
* @param[in] txSublinkState Tx Sublink State
* @param[in] txSublinkSubState Tx Sublink Substate
* @param[in] timeout Timeout
*
* return NVL_SUCCESS if the tx sublink transitioned to the target state
*/
NvlStatus
nvlink_core_poll_tx_sublink_state
(
nvlink_link *link,
NvU64 txSublinkState,
NvU32 txSublinkSubState,
NvU32 timeout
)
{
NvU64 currentTxSublinkState = ~0;
NvU32 currentTxSublinkSubState = ~0;
link->link_handlers->get_tx_mode(link,
&currentTxSublinkState,
&currentTxSublinkSubState);
while (!((currentTxSublinkState == txSublinkState) &&
(currentTxSublinkSubState == txSublinkSubState)))
{
nvlink_sleep(1);
timeout--;
if (!timeout)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Timeout occured while polling on link.\n",
__FUNCTION__));
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link info: device: %s link: %s txsublink state"
" expected: 0x%08llx actual: 0x%08llx.\n",
__FUNCTION__, link->dev->deviceName, link->linkName,
txSublinkState, currentTxSublinkState));
return NVL_ERR_INVALID_STATE;
}
link->link_handlers->get_tx_mode(link,
&currentTxSublinkState,
&currentTxSublinkSubState);
}
return NVL_SUCCESS;
}
/**
* Poll for the rx sublink to reach the specified state upto the given
* timeout. Sublink state transition is considered failed once timeout
* occurs.
*
* @param[in] link NVLink pointer
* @param[in] rxSublinkState Rx Sublink State
* @param[in] rxSublinkSubState Rx Sublink Substate
* @param[in] timeout Timeout
*
* return NVL_SUCCESS if the rx sublink transitioned to the target state
*/
NvlStatus
nvlink_core_poll_rx_sublink_state
(
nvlink_link *link,
NvU64 rxSublinkState,
NvU32 rxSublinkSubState,
NvU32 timeout
)
{
NvU64 currentRxSublinkState = ~0;
NvU32 currentRxSublinkSubState = ~0;
link->link_handlers->get_rx_mode(link,
&currentRxSublinkState,
&currentRxSublinkSubState);
while (!((currentRxSublinkState == rxSublinkState) &&
(currentRxSublinkSubState == rxSublinkSubState)))
{
nvlink_sleep(1);
timeout--;
if (!timeout)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Timeout occured while polling on link.\n",
__FUNCTION__));
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link info: device: %s link: %s rxsublink state "
"expected: 0x%08llx actual: 0x%08llx.\n",
__FUNCTION__, link->dev->deviceName, link->linkName,
rxSublinkState, currentRxSublinkState));
return NVL_ERR_INVALID_STATE;
}
link->link_handlers->get_rx_mode(link,
&currentRxSublinkState,
&currentRxSublinkSubState);
}
return NVL_SUCCESS;
}

View File

@@ -0,0 +1,114 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
static void _nvlink_core_print_link(nvlink_link *);
/**
* Print link state of a single nvlink_link
*
* @param[in] link NVLink Link pointer
*/
void
nvlink_core_print_link_state
(
nvlink_link *link
)
{
NvU64 linkMode = 0;
NvU64 txSublinkMode = 0;
NvU64 rxSublinkMode = 0;
NvU32 txSublinkSubMode = 0;
NvU32 rxSublinkSubMode = 0;
link->link_handlers->get_dl_link_mode(link, &linkMode);
link->link_handlers->get_tx_mode(link, &txSublinkMode, &txSublinkSubMode);
link->link_handlers->get_rx_mode(link, &rxSublinkMode, &rxSublinkSubMode);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: %s:%s linkMode: %lld txSublinkMode: %lld rxSublinkMode: %lld\n",
__FUNCTION__,
link->dev->deviceName, link->linkName,
linkMode, txSublinkMode, rxSublinkMode));
}
/**
* Print information of a single nvlink intranode connection
*
* @param[in] conn NVLink connection pointer
*/
void
nvlink_core_print_intranode_conn
(
nvlink_intranode_conn *conn
)
{
if (NULL == conn) return;
_nvlink_core_print_link(conn->end0);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, " <---> "));
_nvlink_core_print_link(conn->end1);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "\n"));
}
/**
* Print link type and device information of a single nvlink_link
*
* @param[in] link NVLink Link pointer
*/
static void
_nvlink_core_print_link
(
nvlink_link *link
)
{
switch (link->dev->type)
{
case NVLINK_DEVICE_TYPE_GPU:
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "NVGPU"));
break;
case NVLINK_DEVICE_TYPE_IBMNPU:
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "IBMNPU"));
break;
case NVLINK_DEVICE_TYPE_NVSWITCH:
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "NVSWITCH"));
break;
case NVLINK_DEVICE_TYPE_EBRIDGE:
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "EBRIDGE"));
break;
}
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"(%x): %04x:%02x:%02x.%x %s",
link->dev->pciInfo.device,
link->dev->pciInfo.domain,
link->dev->pciInfo.bus,
link->dev->pciInfo.device,
link->dev->pciInfo.function,
link->linkName));
}

View File

@@ -0,0 +1,845 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
static void _nvlink_core_clear_link_state(nvlink_link *);
/**
* [CLEAN SHUTDOWN]
*
* Shutdown given intranode connections from active to L2 state
*
* @param[in] conns Array of connections to transition to L2
* @param[in] connCount Number of connections in the array
* @param[in] flags Flags to track if training is sync/async
*
* return NVL_SUCCESS if the connections transitioned to L2 successfully
*/
NvlStatus
nvlink_core_powerdown_intranode_conns_from_active_to_L2
(
nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
NvU32 version;
NvU32 i;
if ((conns == NULL) || (connCount == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connections to exit L2\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Set the version. Currently, only one version is supported on a chip
version = conns[0]->end0->version;
/**************** Start the L2 entry sequence for the connections ***************/
// NVLink 3.0 and beyond, link needs to be ACTIVE before it can be transitioned to L2
if ((version >= NVLINK_DEVICE_VERSION_30) && (connCount > 0))
{
for (i = 0; i < connCount; i++)
{
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_HS);
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
{
continue;
}
// We can train connections to HS only when they are already in SAFE
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_SAFE);
if (status != NVL_SUCCESS)
{
continue;
}
if (nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(&conns[i], 1, flags))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Failed to train connection to ACTIVE.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
}
}
// STEP 0: Disable HeartBeat on the endpoints of all connections
for (i = 0; i < connCount; i++)
{
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_DISABLE_HEARTBEAT,
flags);
// Only send if not in loopback
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_DISABLE_HEARTBEAT,
flags);
}
}
// STEP 1: Disable PM on the endpoints of all connections
for (i = 0; i < connCount; i++)
{
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
// Only send if not in loopback
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
}
}
// Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_DISABLE_PM completes
if (flags == NVLINK_STATE_CHANGE_ASYNC)
{
for (i = 0; i < connCount; i++)
{
status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode);
if ((status != NVL_SUCCESS) ||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s is not in good state after sending DISABLE PM\n",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode);
if ((status != NVL_SUCCESS) ||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s is not in good state after sending DISABLE PM\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
}
// Check for each connection, if both the ends and their sublinks are in HS mode
for (i = 0; i < connCount; i++)
{
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_HS);
if (status == NVL_ERR_INVALID_STATE)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s - Link %s:%s is not in good state\n",
__FUNCTION__,
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
else if (status == NVL_SUCCESS)
{
// STEP 2: Change link state from ACTIVE to SWCFG on all endpoints
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
flags);
// Only send if not in loopback
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
flags);
}
}
}
//
// All the endpoints should now either be in SWCFG or transitioning to SWCFG. Poll for all
// endpoints to reach SWCFG. If any endpoint does not transition to SWCFG, return error
//
for (i = 0; i < connCount; i++)
{
// Wait for the end0 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG\n",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
// Wait for the end1 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
// STEP 3: Change sub-link state to SAFE on all endpoints
for (i = 0; i < connCount; i++)
{
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
// Only send if not in loopback
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
}
}
// Poll for all endpoints sub-link state to reach SAFE
for (i = 0; i < connCount; i++)
{
// Wait for sublinks to go to SAFE
status = nvlink_core_poll_sublink_state(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end1,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE\n",
__FUNCTION__));
}
status = nvlink_core_poll_sublink_state(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end0,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE\n",
__FUNCTION__));
}
}
// STEP 4: Save link state on all the endpoints
for (i = 0; i < connCount; i++)
{
if (!conns[i]->end0->bStateSaved)
{
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SAVE_STATE,
flags);
}
if (!conns[i]->end1->bStateSaved)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_SAVE_STATE,
flags);
}
}
// Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_SAVE_STATE completes
if (flags == NVLINK_STATE_CHANGE_ASYNC)
{
for (i = 0; i < connCount; i++)
{
status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode);
if ((status != NVL_SUCCESS) ||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s is not in good state after sending SAVESTATE command\n",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode);
if ((status != NVL_SUCCESS) ||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s is not in good state after sending SAVESTATE command\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
}
// STEP 5: Trigger the sleep request on all the endpoints
for (i = 0; i < connCount; i++)
{
//
// Send SLEEP request on one end of connection if not in loopback.
// Don' poll, since transition will happen when both ends get the request
//
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SLEEP,
NVLINK_STATE_CHANGE_ASYNC);
}
// Send SLEEP request on both ends and poll for completion
conns[i]->end1->link_handlers->set_tl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_SLEEP,
NVLINK_STATE_CHANGE_SYNC);
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SLEEP,
NVLINK_STATE_CHANGE_SYNC);
}
// Finally check the connection states
for (i = 0; i < connCount; i++)
{
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_SLEEP);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link %s:%s - Link %s:%s is not in good state after sending SLEEP request\n",
__FUNCTION__,
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
// Update the link and sublink states in the core library
conns[i]->end0->state = NVLINK_LINKSTATE_SLEEP;
conns[i]->end1->state = NVLINK_LINKSTATE_SLEEP;
conns[i]->end0->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
conns[i]->end1->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
conns[i]->end0->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
conns[i]->end1->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
// Update power state transition status for the connection
conns[i]->end0->powerStateTransitionStatus = nvlink_power_state_in_L2;
conns[i]->end1->powerStateTransitionStatus = nvlink_power_state_in_L2;
}
/***************** End of L2 entry sequence for the connections ****************/
//
// Note that status is squashed, since the expectation is that we soldier on if any link fails
// during the transition to L2 state
//
return NVL_SUCCESS;
}
/**
* [PSEUDO-CLEAN SHUTDOWN]
*
* Shutdown the given array of intranode connections from ACTIVE to OFF state
*
* @param[in] conns Array of connections to shutdown
* @param[in] connCount Number of connections in the array
* @param[in] flags Flags to track if shutdown is sync/async
*
* return NVL_SUCCESS if the connections shutdown successfully
*/
NvlStatus
nvlink_core_powerdown_intranode_conns_from_active_to_off
(
nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
NvU32 i;
if ((conns == NULL) || (connCount == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connections to shutdown\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
for (i = 0; i < connCount; i++)
{
// Disable Power Management before moving link out of Active
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
}
// Move both ends to SWCFG
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
flags);
}
}
// Poll for links to reach SWCFG & initiate sublinks to SAFE state
for (i = 0; i < connCount; i++)
{
// Wait for the end0 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
// to track Failure
conns[i]->end0->inSWCFG = NV_FALSE;
}
else
{
conns[i]->end0->inSWCFG = NV_TRUE;
}
// Wait for the end1 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
// to track Failure
conns[i]->end1->inSWCFG = NV_FALSE;
}
else
{
conns[i]->end1->inSWCFG = NV_TRUE;
}
// Change each sublink state to SAFE
if(conns[i]->end0->inSWCFG == NV_TRUE)
{
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
}
if (conns[i]->end0 != conns[i]->end1 && conns[i]->end1->inSWCFG == NV_TRUE)
{
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
}
}
// Poll for sublinks to reach SAFE state
for (i = 0; i < connCount; i++)
{
// Wait for sublinks to go to SAFE
if(conns[i]->end0->inSWCFG == NV_TRUE)
{
status = nvlink_core_poll_sublink_state(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end1,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
}
if (status != NVL_SUCCESS || conns[i]->end0->inSWCFG == NV_FALSE)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE",
__FUNCTION__));
}
if(conns[i]->end1->inSWCFG == NV_TRUE)
{
status = nvlink_core_poll_sublink_state(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end0,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
}
if (status != NVL_SUCCESS || conns[i]->end1->inSWCFG == NV_FALSE)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE",
__FUNCTION__));
}
//
// Disable error detect on both sides of the link
//
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_DISABLE_ERR_DETECT,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_DISABLE_ERR_DETECT,
flags);
}
//
// Disable Lanes on both sides of the link
//
status = conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_LANE_DISABLE,
flags);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to disable lanes for link %s:%s\n",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
if (conns[i]->end0 != conns[i]->end1)
{
status = conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_LANE_DISABLE,
flags);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to disable lanes for link %s:%s\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
//
// Shutdown Lanes on both sides of the link
//
status = conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_LANE_SHUTDOWN,
flags);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to shutdown lanes for link %s:%s\n",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
if (conns[i]->end0 != conns[i]->end1)
{
status = conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_LANE_SHUTDOWN,
flags);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to shutdown lanes for link %s:%s\n",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, NVLINK_LINKSTATE_OFF, flags);
// Link becomes in-accessible after its turned off. Check if this is a loopback connection
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, NVLINK_LINKSTATE_OFF, flags);
}
_nvlink_core_clear_link_state(conns[i]->end0);
_nvlink_core_clear_link_state(conns[i]->end1);
}
//
// Squash status. If any side of link doesn not respond the link is
// shutdown unilaterally
//
return NVL_SUCCESS;
}
/**
* Power down the given array of intranode connections from ACTIVE to SWCFG state
*
* @param[in] conns Array of connections to shutdown
* @param[in] connCount Number of connections in the array
* @param[in] flags Flags to track if shutdown is sync/async
*
* return NVL_SUCCESS if the connections shutdown successfully
*/
NvlStatus
nvlink_core_powerdown_intranode_conns_from_active_to_swcfg
(
nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
NvU32 i;
if ((conns == NULL) || (connCount == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connections to shutdown\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
for (i = 0; i < connCount; i++)
{
// Disable Power Management before moving link out of Active
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_DISABLE_PM,
flags);
}
// Move both ends to SWCFG
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
flags);
}
}
//
// Poll _SAFE state for connections and set corresponding sublinks to _SAFE
//
for (i = 0; i < connCount; i++)
{
// Wait for the end0 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end0,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG",
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
}
else
{
// Change each sublink state to SAFE
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
}
if (conns[i]->end0 != conns[i]->end1)
{
// Wait for the end1 to go to SWCFG
status = nvlink_core_poll_link_state(conns[i]->end1,
NVLINK_LINKSTATE_SAFE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set endpoint %s:%s in SWCFG",
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
else
{
// Change each sublink state to SAFE
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
flags);
}
}
}
// Wait for sublinks to go to SAFE
for (i = 0; i < connCount; i++)
{
status = nvlink_core_poll_sublink_state(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end1,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE (TX:RX)",
__FUNCTION__));
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: sublinks (%s:%s) (%s:%s)",
__FUNCTION__,
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
status = nvlink_core_poll_sublink_state(conns[i]->end1,
NVLINK_SUBLINK_STATE_TX_SAFE,
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
conns[i]->end0,
NVLINK_SUBLINK_STATE_RX_SAFE,
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
NVLINK_TRANSITION_SAFE_TIMEOUT);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to set sublinks to SAFE (RX:TX)",
__FUNCTION__));
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: sublinks (%s:%s) (%s:%s)",
__FUNCTION__,
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
}
}
// Update tracking info
for (i = 0; i < connCount; i++)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Connection is in SAFE mode. ",
__FUNCTION__));
nvlink_core_print_intranode_conn(conns[i]);
}
//
// Squash status. If any side of link doesn not respond the link is
// shutdown unilaterally
//
return NVL_SUCCESS;
}
/**
* Reset the given array of intranode connections
*
* @param[in] conns Array of connections to reset
* @param[in] connCount Number of connections in the array
* @param[in] flags Flags
*
* return NVL_SUCCESS if the connections reset successfully
*/
NvlStatus
nvlink_core_reset_intranode_conns
(
nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags
)
{
NvU32 i;
if ((conns == NULL) || (connCount == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connections to shutdown\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
for (i = 0; i < connCount; i++)
{
//
// Reset both ends of this connection.
// This path should enable/init those link endpoints as well.
//
// NVLink3.0 + uses the TL link reset
//
if (conns[i]->end0->version >= NVLINK_DEVICE_VERSION_30)
{
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_RESET,
flags);
if (conns[i]->end0 != conns[i]->end1)
{
conns[i]->end1->link_handlers->set_tl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_RESET,
flags);
}
}
else
{
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
NVLINK_LINKSTATE_RESET,
flags);
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
NVLINK_LINKSTATE_RESET,
flags);
}
_nvlink_core_clear_link_state(conns[i]->end0);
_nvlink_core_clear_link_state(conns[i]->end1);
}
return NVL_SUCCESS;
}
/**
* Clears Core Library State
*
* @param[in] link NVLink Link pointer
*/
static void
_nvlink_core_clear_link_state
(
nvlink_link *link
)
{
// Receiver Detect needs to happen again
link->bRxDetected = NV_FALSE;
// INITNEGOTIATE needs to happen again
link->bInitnegotiateConfigGood = NV_FALSE;
// TxCommonMode needs to happen again
link->bTxCommonModeFail = NV_FALSE;
// SAFE transition needs to happen again
link->bSafeTransitionFail = NV_FALSE;
// Reset the SW state tracking the link and sublink states
link->state = NVLINK_LINKSTATE_OFF;
link->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
link->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,281 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
/**
* Get the connected remote endpoint information
*
* For a given link, return the remote endpoint details it is connected to.
* If there is no connection associated with the specified link, then, the
* conn_info.connected member will be NV_FALSE.
*
* Note: This routine will not initiate any link initialization or topology
* discovery.
*
* @param[in] link NVLink Link pointer
* @param[out] conn_info Details of remote endpoint
*/
NvlStatus
nvlink_lib_get_remote_conn_info
(
nvlink_link *link,
nvlink_conn_info *conn_info
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_link *remoteEnd = NULL;
nvlink_intranode_conn *intraConn = NULL;
nvlink_internode_conn *interConn = NULL;
NvU32 numLinks = 0;
nvlink_link **links = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (links == NULL)
{
return NVL_NO_MEM;
}
// Initialize connected state to false
conn_info->bConnected = NV_FALSE;
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
nvlink_free((void *)links);
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the
// connnection list
//
// Find the associated intranode connection with this link
nvlink_core_get_intranode_conn(link, &intraConn);
if (intraConn != NULL)
{
// Get the required remote endpoint of the connection
remoteEnd = (intraConn->end0 == link ?
intraConn->end1 : intraConn->end0);
// Mark the connected state
conn_info->bConnected = NV_TRUE;
if ((numLinks+1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
__FUNCTION__));
nvlink_assert(0);
// Release the top-level lock and free links
nvlink_lib_top_lock_release();
nvlink_free((void *)links);
return NVL_ERR_INVALID_STATE;
}
links[numLinks] = link;
numLinks++;
links[numLinks] = remoteEnd;
numLinks++;
}
//
// On multi-node systems, check the internode connection
// list as well to return the connection information
//
nvlink_core_get_internode_conn(link, &interConn);
if (interConn != NULL)
{
// Mark the connected state
conn_info->bConnected = NV_TRUE;
links[numLinks] = link;
numLinks++;
}
// Acquire per-link lock
status = nvlink_lib_link_locks_acquire(links, numLinks);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
nvlink_lib_top_lock_release();
nvlink_free((void *)links);
return status;
}
if (intraConn != NULL)
{
nvlink_core_copy_intranode_conn_info(remoteEnd, conn_info);
}
else
{
if (interConn != NULL)
{
nvlink_core_copy_internode_conn_info(&interConn->remote_end,
conn_info);
}
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, numLinks);
// Release top-level lock
nvlink_lib_top_lock_release();
if (links != NULL)
{
nvlink_free((void *)links);
}
return NVL_SUCCESS;
}
/**
* Get the connected remote endpoint information
*
* For a given link, return the remote endpoint it is connected to.
*
* Note: This routine triggers topology discovery on the set of
* links registered in the core library
*
* @param[in] end NVLink Link pointer
* @param[out] conn_info Details of remote endpoint
* @param[in] flags Flags
*/
NvlStatus
nvlink_lib_discover_and_get_remote_conn_info
(
nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_link *link = NULL;
nvlink_link *remote_end = NULL;
nvlink_device *dev = NULL;
NvU32 numLinks = 0;
nvlink_link **links = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (links == NULL)
{
return NVL_NO_MEM;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
nvlink_free((void *)links);
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists
//
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
__FUNCTION__));
nvlink_assert(0);
// Release the top-level lock and free links
nvlink_lib_top_lock_release();
nvlink_free((void *)links);
return NVL_ERR_INVALID_STATE;
}
links[numLinks] = link;
numLinks++;
}
}
// Acquire the per-link locks
status = nvlink_lib_link_locks_acquire(links, numLinks);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
nvlink_free((void *)links);
return status;
}
// Initialize connected state to false
conn_info->bConnected = NV_FALSE;
// Get the remote_end of the link
nvlink_core_discover_and_get_remote_end(end, &remote_end, flags);
if (remote_end)
{
// mark the connected state
conn_info->bConnected = NV_TRUE;
nvlink_core_copy_intranode_conn_info(remote_end, conn_info);
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, numLinks);
// Release the top-level lock
nvlink_lib_top_lock_release();
if (links != NULL)
{
nvlink_free((void *)links);
}
return NVL_SUCCESS;
}

View File

@@ -0,0 +1,118 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
/**
* Re-Initialize a given link from OFF to SWCFG
*
* @param[in] link NVLink Link pointer
* @param[in] flags Flag to track if the initialization is aync/async
*
* return NVL_SUCCESS if the initialization was successful
*/
NvlStatus
nvlink_lib_reinit_link_from_off_to_swcfg
(
nvlink_link *link,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_intranode_conn *conn = NULL;
nvlink_link *links[2] = {0};
if (!link)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the
// connection list
//
nvlink_core_get_intranode_conn(link, &conn);
if (!conn)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connection was found for this link.\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
return NVL_ERR_GENERIC;
}
links[0] = conn->end0;
links[1] = conn->end1;
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(links, 2);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
return status;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
{
nvlink_core_init_links_from_off_to_swcfg(links, 2, flags);
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, 2);
return NVL_SUCCESS;
}

View File

@@ -0,0 +1,323 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
/**
* TODO: Rework this function to acquire locks and update callers
*
* Check if the device has no links registered
*
* @param[in] dev NVLink Device pointer
*
* return NV_TRUE if the device has no links registered
*/
NvBool
nvlink_lib_is_link_list_empty
(
nvlink_device *dev
)
{
NvBool isEmpty = NV_TRUE;
isEmpty = nvListIsEmpty(&dev->link_list);
return isEmpty;
}
/**
* Get the link associated with the given link id.
*
* @param[in] device NVLink Device Pointer
* @param[in] link_id Link Id of the given link
* @param[out] link NVLink Link pointer
*
* return NVL_SUCCESS on success
*/
NvlStatus
nvlink_lib_get_link
(
nvlink_device *device,
NvU32 link_id,
nvlink_link **link
)
{
nvlink_link *cur = NULL;
NvlStatus status = -NVL_NOT_FOUND;
if (device == NULL || link == NULL)
{
return -NVL_BAD_ARGS;
}
*link = NULL;
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the
// link list for the device
//
// Reset status to -NVL_NOT_FOUND
status = -NVL_NOT_FOUND;
FOR_EACH_LINK_REGISTERED(cur, device, node)
{
if (cur->linkNumber == link_id)
{
*link = cur;
status = NVL_SUCCESS;
break;
}
}
// Release the top level-lock
nvlink_lib_top_lock_release();
return status;
}
/**
* Set the given link as the link master.
* This requires that the remote end of the link is known, and that it
* hasn't set itself to be the master.
*
* Note: This function is used by RM to set master attribute to a link
* in order to handle GPU lock inversion problem while servicing
* link interrupts(re-training). With external fabric management
* enabled, we don't have the issue. Also we don't have to worry
* about the inter-node connections which are managed by FM.
*
* @param[in] link NVLink Link pointer
*
* return NVL_SUCCESS if the master was set
*/
NvlStatus
nvlink_lib_set_link_master
(
nvlink_link *link
)
{
nvlink_link *remote_end = NULL;
NvlStatus status = NVL_SUCCESS;
nvlink_intranode_conn *conn = NULL;
nvlink_link *links[2] = {0};
NvU32 numLinks = 0;
if (link == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the
// connection list
//
links[numLinks] = link;
numLinks++;
nvlink_core_get_intranode_conn(link, &conn);
if (conn != NULL)
{
remote_end = (conn->end0 == link ? conn->end1 : conn->end0);
links[numLinks] = remote_end;
numLinks++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(links, numLinks);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
return status;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Early return if we've already done this
if (link->master)
{
status = NVL_SUCCESS;
}
else
{
// Make sure the remote end exists and hasn't claimed the master yet
if (remote_end == NULL || remote_end->master)
{
status = NVL_ERR_INVALID_STATE;
}
else
{
link->master = NV_TRUE;
}
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, numLinks);
return status;
}
/**
* Get the link master associated with the given link.
* This may be the given link, or it may be the remote end. In the case
* when no master is assigned or the remote end is not known, this will
* return an error.
*
* @param[in] link NVLink Link pointer
* @param[out] master Master endpoint for the link
*
* return NVL_SUCCESS if the master was found
*/
NvlStatus
nvlink_lib_get_link_master
(
nvlink_link *link,
nvlink_link **master
)
{
nvlink_link *remote_end = NULL;
nvlink_intranode_conn *conn = NULL;
NvlStatus status = NVL_SUCCESS;
nvlink_link *links[2] = {0};
NvU32 numLinks = 0;
if (link == NULL || master == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the
// connection list
//
links[numLinks] = link;
numLinks++;
nvlink_core_get_intranode_conn(link, &conn);
if (conn != NULL)
{
remote_end = (conn->end0 == link ? conn->end1 : conn->end0);
links[numLinks] = remote_end;
numLinks++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(links, numLinks);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
return status;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
if (link->master)
{
*master = link;
}
else
{
// Make sure the remote end exists and hasn't claimed the master yet
if (remote_end == NULL)
{
status = NVL_ERR_INVALID_STATE;
}
*master = remote_end;
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, numLinks);
return status;
}

View File

@@ -0,0 +1,508 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
static NvBool _nvlink_lib_is_device_registered(nvlink_device *);
static NvBool _nvlink_lib_is_link_registered(nvlink_device *, nvlink_link *);
/**
* Associates device with the NVLink Core Library
*
* @param[in] dev NVLink Device pointer
*
* return NVL_SUCCESS if the device is registered successfully
*/
NvlStatus
nvlink_lib_register_device
(
nvlink_device *dev
)
{
NvlStatus lock_status = NVL_SUCCESS;
NvlStatus result = NVL_SUCCESS;
if (dev == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad device pointer\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Assign the deviceId for the device
dev->deviceId = (NvU64)(NvUPtr)dev;
// Assign fabric node id to the device object
dev->nodeId = nvlinkLibCtx.nodeId;
// Register the device if not yet registered
if (!_nvlink_lib_is_device_registered(dev))
{
// Initialize the node and link list for the device
nvListInit(&dev->link_list);
nvListInit(&dev->node);
// Add the device to the list of devices
nvListAppend(&dev->node, &nvlinkLibCtx.nv_devicelist_head.node);
result = NVL_SUCCESS;
}
else
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: %s is already registered in nvlink core\n",
__FUNCTION__, dev->deviceName));
result = NVL_ERR_GENERIC;
}
// Release top-level lock
nvlink_lib_top_lock_release();
return result;
}
/**
* Unassociates device from the NVLink Core
* Includes removing any links related to the device if still registered
*
* @param[in] dev NVLink Device pointer
*
* return NVL_SUCCESS if the device is un-registered successfully
*/
NvlStatus
nvlink_lib_unregister_device
(
nvlink_device *dev
)
{
NvBool bConnected = NV_FALSE;
nvlink_intranode_conn *intra_conn = NULL;
nvlink_internode_conn *inter_conn = NULL;
NvlStatus lock_status = NVL_SUCCESS;
NvU32 numLinks = 0;
nvlink_link *curLink = NULL;
nvlink_link *nextLink = NULL;
if (dev == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad device pointer\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Loop to unregister each link from the device
FOR_EACH_LINK_REGISTERED_SAFE(curLink, nextLink, dev, node)
{
// Reset the variables specific to each link
bConnected = NV_FALSE;
intra_conn = NULL;
inter_conn = NULL;
numLinks = 0;
// We will use at most 2 links in this function - the link and it's partner
nvlink_link *links[2] = {0};
links[numLinks] = curLink;
numLinks++;
// Check if there's an intranode connection present
nvlink_core_get_intranode_conn(curLink, &intra_conn);
if (intra_conn != NULL)
{
// Mark the endpoint as connected
bConnected = NV_TRUE;
if (intra_conn->end0 == curLink)
{
links[numLinks] = intra_conn->end1;
}
else
{
links[numLinks] = intra_conn->end0;
}
numLinks++;
}
//
// Check if there's an internode connection present
// Only the local end required for internode connection
// (which is registered above) so just detect this for now
//
nvlink_core_get_internode_conn(curLink, &inter_conn);
// Acquire per-link lock
lock_status = nvlink_lib_link_locks_acquire(links, numLinks);
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
nvlink_lib_top_lock_release();
return lock_status;
}
if (intra_conn != NULL)
{
// Remove the associated intranode connection with this link from the list
nvlink_core_remove_intranode_conn(intra_conn);
}
if (inter_conn != NULL)
{
// Remove the associated internode connection with this link from the list
nvlink_core_remove_internode_conn(curLink);
}
// Remove the link from the link list for the device
nvListDel(&curLink->node);
// Release and free the link locks
nvlink_lib_link_locks_release(links, numLinks);
nvlink_lib_link_lock_free(curLink);
curLink->link_handlers->remove(curLink);
// If the endpoint was not connected
nvlinkLibCtx.notConnectedEndpoints = ( bConnected ?
nvlinkLibCtx.notConnectedEndpoints :
nvlinkLibCtx.notConnectedEndpoints - 1 );
// Update count of registered endpoints
nvlinkLibCtx.registeredEndpoints--;
}
nvListDel(&dev->node);
// Release top-level lock
nvlink_lib_top_lock_release();
return NVL_SUCCESS;
}
/**
* Associates link with a device in the NVLink Core library
*
* @param[in] dev NVLink Device pointer
* @param[in] link NVLink Link pointer
*
* return NVL_SUCCESS if the link is registered successfully
*/
NvlStatus
nvlink_lib_register_link
(
nvlink_device *dev,
nvlink_link *link
)
{
NvlStatus lock_status = NVL_SUCCESS;
NvlStatus result = NVL_SUCCESS;
if (dev == NULL || link == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad device or link pointer\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Allocate per-link lock for the link to be registered
lock_status = nvlink_lib_link_lock_alloc(link);
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to alloc per-link lock\n",
__FUNCTION__));
return lock_status;
}
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
//
// Since the per-link lock will be allocated when this function
// is run again. Free the unused allocated lock.
//
nvlink_lib_link_lock_free(link);
return lock_status;
}
// Top-level lock is now acquired
// Assign the linkId for the device
link->linkId = (NvU64)(NvUPtr) link;
// Register the link if not yet registered
if (!_nvlink_lib_is_link_registered(dev, link))
{
// Initialize the node for the link
nvListInit(&link->node);
// Generate token for this link
link->token = (NvU64)(NvUPtr) link;
// Add the link to the list of links for the device
nvListAppend(&link->node, &dev->link_list);
link->link_handlers->add(link);
// Initialize training parameters
link->safe_retries = 0;
link->packet_injection_retries = 0;
// Update count of registered endpoints
nvlinkLibCtx.registeredEndpoints++;
// Indicate that a new endpoint is registered
nvlinkLibCtx.bNewEndpoints = NV_TRUE;
result = NVL_SUCCESS;
}
else
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: %s: %s is already registered in nvlink core\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
result = NVL_ERR_GENERIC;
// Free per-link lock since we don't have a new link
nvlink_lib_link_lock_free(link);
}
// Release top-level lock
nvlink_lib_top_lock_release();
return result;
}
/**
* Unassociates link from a device in the NVLink Core library
*
* @param[in] link NVLink Link pointer
*
* return NVL_SUCCESS if the link is un-registered successfully
*/
NvlStatus
nvlink_lib_unregister_link
(
nvlink_link *link
)
{
NvBool bConnected = NV_FALSE;
nvlink_intranode_conn *intra_conn = NULL;
nvlink_internode_conn *inter_conn = NULL;
NvlStatus lock_status = NVL_SUCCESS;
NvU32 numLinks = 0;
// We will use at most 2 links in this function - the link and it's partner
nvlink_link *links[2] = {0};
if (link == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
links[numLinks] = link;
numLinks++;
// Check if there's an intranode connection present
nvlink_core_get_intranode_conn(link, &intra_conn);
if (intra_conn != NULL)
{
// Mark the endpoint as connected
bConnected = NV_TRUE;
if (intra_conn->end0 == link)
{
links[numLinks] = intra_conn->end1;
}
else
{
links[numLinks] = intra_conn->end0;
}
numLinks++;
}
//
// Check if there's an internode connection present
// Only the local end required for internode connection
// (which is registered above) so just detect this for now
//
nvlink_core_get_internode_conn(link, &inter_conn);
// Acquire per-link lock
lock_status = nvlink_lib_link_locks_acquire(links, numLinks);
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
nvlink_lib_top_lock_release();
return lock_status;
}
if (intra_conn != NULL)
{
// Remove the associated intranode connection with this link from the list
nvlink_core_remove_intranode_conn(intra_conn);
}
if (inter_conn != NULL)
{
// Remove the associated internode connection with this link from the list
nvlink_core_remove_internode_conn(link);
}
// Remove the link from the link list for the device
nvListDel(&link->node);
// Release and free the locks
nvlink_lib_link_locks_release(links, numLinks);
nvlink_lib_link_lock_free(link);
link->link_handlers->remove(link);
// If the endpoint was not connected
nvlinkLibCtx.notConnectedEndpoints = ( bConnected ?
nvlinkLibCtx.notConnectedEndpoints :
nvlinkLibCtx.notConnectedEndpoints - 1 );
// Update count of registered endpoints
nvlinkLibCtx.registeredEndpoints--;
// Release top-level lock
nvlink_lib_top_lock_release();
return NVL_SUCCESS;
}
/**
* Check if the nvlink device is already registered in the core library
*
* @param[in] dev NVLink Device pointer
*
* return NV_TRUE if the device is already registered
*/
static NvBool
_nvlink_lib_is_device_registered
(
nvlink_device *dev
)
{
nvlink_device *tmpDev = NULL;
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
{
if (dev->deviceId == tmpDev->deviceId)
{
return NV_TRUE;
}
}
return NV_FALSE;
}
/**
* Check if the nvlink link is already registered in the core library
*
* @param[in] dev NVLink Device pointer
* @param[in] link NVLink Link pointer
*
* return NV_TRUE if the link is already registered for the device
*/
static NvBool
_nvlink_lib_is_link_registered
(
nvlink_device *dev,
nvlink_link *link
)
{
nvlink_link *tmpLink = NULL;
FOR_EACH_LINK_REGISTERED(tmpLink, dev, node)
{
if (link->linkId == tmpLink->linkId)
{
return NV_TRUE;
}
}
return NV_FALSE;
}

View File

@@ -0,0 +1,777 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
/**
* [CLEAN SHUTDOWN]
*
* Shutdown given links of a device from active to L2 state
*
* param[in] dev NVLink Device pointer
* param[in] linkMask Mask of links to be shutdown
* param[in] flags Flags to track if the transition is sync/async
*
* return NVL_SUCCESS if the links transition to L2
*/
NvlStatus
nvlink_lib_powerdown_links_from_active_to_L2
(
nvlink_device *dev,
NvU32 linkMask,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_link *link = NULL;
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvU32 numLinks = 0;
NvU32 numConns = 0;
NvU32 connCount = 0;
NvU32 i;
NvU32 lockLinkCount = 0;
nvlink_link **lockLinks = NULL;
if (dev == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad device pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
lockLinks = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
nvlink_free((void *)lockLinks);
return NVL_ERR_GENERIC;
}
// Initialize the list of links
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * 32);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
// Get the array of link endpoints whose lock needs to be acquired
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
if (conn == NULL)
{
//
// Could not find the connection for the link. Release the
// top-level lock and return
//
nvlink_lib_top_lock_release();
status = NVL_ERR_GENERIC;
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
}
else if ((numLinks + 1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
__FUNCTION__));
nvlink_assert(0);
// Release the top-level lock
nvlink_lib_top_lock_release();
status = NVL_ERR_INVALID_STATE;
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
}
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
status = NVL_ERR_GENERIC;
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
}
// Filter the connections which are already in SLEEP
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
// If the link received a L2 exit request, but never exited L2
if (link->powerStateTransitionStatus == nvlink_power_state_exiting_L2)
{
// Update the power state transition status
link->powerStateTransitionStatus = nvlink_power_state_in_L2;
continue;
}
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
// Check the connection state to verify if the link is already in SLEEP
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SLEEP);
if (status == NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is already in sleep %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
continue;
}
// Link is not in SLEEP. Update power state transition status for the link
link->powerStateTransitionStatus = nvlink_power_state_entering_L2;
}
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
// Check if the link desires to enter SLEEP
if (link->powerStateTransitionStatus == nvlink_power_state_entering_L2)
{
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
// The connection will enter SLEEP only when both its endpoints desire to enter SLEEP
if ((conn->end0->powerStateTransitionStatus == nvlink_power_state_entering_L2) &&
(conn->end1->powerStateTransitionStatus == nvlink_power_state_entering_L2))
{
// Increment the #connections considered for entering L2
numConns++;
// Check if the the connection is already included in the list
for (i = 0; i < connCount; i++)
{
if (conns[i] == conn)
break;
}
// If this is a new connection, add it to the list
if (i == connCount)
{
conns[connCount] = conn;
connCount++;
}
}
numLinks++;
}
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Clear the status variable
status = NVL_SUCCESS;
if (connCount > 0)
{
status = nvlink_core_powerdown_intranode_conns_from_active_to_L2(conns, connCount, flags);
}
if (status == NVL_SUCCESS)
{
//
// If some links are waiting on the remote end to request sleep,
// update status to NVL_MORE_PROCESSING_REQUIRED
//
status = (numLinks != numConns ? NVL_MORE_PROCESSING_REQUIRED : NVL_SUCCESS);
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_powerdown_links_from_active_to_L2_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
return status;
}
/**
* [PSEUDO-CLEAN SHUTDOWN]
*
* Shutdown the given array of links from ACTIVE to OFF state
*
* param[in] links Array of links to shutdown
* param[in] numLinks Number of links to be shutdown
* param[in] flags Flags to track if the transition is sync/async
*
* return NVL_SUCCESS if the pseudo-clean shutdown is successful
*/
NvlStatus
nvlink_lib_powerdown_links_from_active_to_off
(
nvlink_link **links,
NvU32 numLinks,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvU32 numConns = 0;
NvU32 i;
NvU32 lockLinkCount = 0;
nvlink_link **lockLinks = NULL;
if ((links == NULL) || (numLinks == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No links to shutdown\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * numLinks));
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * numLinks);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
nvlink_free((void *)lockLinks);
return NVL_ERR_GENERIC;
}
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_powerdown_links_from_active_to_off_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
//
// Get the array of both local and remote endpoints whose lock needs
// to be acquired
//
for (i = 0; i < numLinks; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (conn == NULL)
{
continue;
}
// Capture both the link and its end-point
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_powerdown_links_from_active_to_off_end;
}
// Sanity checking if the link is already in OFF/RESET state
for (i = 0; i < numLinks; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (conn == NULL)
{
continue;
}
// Check if both ends of the connection are in L2
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SLEEP);
if (status == NVL_SUCCESS)
{
continue;
}
// Check if both ends and their sublinks are in OFF mode
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_OFF);
if (status == NVL_SUCCESS)
{
continue;
}
// Check if both ends are in RESET
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_RESET);
if (status == NVL_SUCCESS)
{
continue;
}
conns[numConns] = conn;
numConns++;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Reset status to NVL_SUCCESS
status = NVL_SUCCESS;
if (numConns > 0)
{
status = nvlink_core_powerdown_intranode_conns_from_active_to_off(conns,
numConns,
flags);
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_powerdown_links_from_active_to_off_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
//
// Squash status. If any side of link doesn not respond the link is
// shutdown unilaterally
//
return NVL_SUCCESS;
}
/**
* Power down the given array of links from ACTIVE to SWCFG state
*
* param[in] links Array of links to shutdown
* param[in] numLinks Number of links to be shutdown
* param[in] flags Flags to track if the transition is sync/async
*
* return NVL_SUCCESS if the transitions were successful
*/
NvlStatus
nvlink_lib_powerdown_links_from_active_to_swcfg
(
nvlink_link **links,
NvU32 numLinks,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvU32 numConns = 0;
NvU32 i;
NvU32 lockLinkCount = 0;
nvlink_link **lockLinks = NULL;
if ((links == NULL) || (numLinks == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No links to shutdown\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Allocate the link locks
lockLinks = (nvlink_link **)nvlink_malloc(sizeof(nvlink_link *) * (2 * numLinks));
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * numLinks);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
nvlink_free((void *)lockLinks);
return NVL_ERR_GENERIC;
}
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_powerdown_links_from_active_to_swcfg_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
//
// Get the array of both local and remote endpoints whose lock needs
// to be acquired
//
for (i = 0; i < numLinks; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (conn == NULL)
{
continue;
}
// Capture both the link and its end-point
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_powerdown_links_from_active_to_swcfg_end;
}
// Sanity checking of links; if already in swfg state, skip it
for (i = 0; i < numLinks; i++)
{
nvlink_intranode_conn *conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (conn == NULL)
{
continue;
}
// Check if both ends and their sublinks are in SAFE mode
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
if (status == NVL_SUCCESS)
{
continue;
}
conns[numConns] = conn;
numConns++;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Reset status to NVL_SUCCESS
status = NVL_SUCCESS;
if (numConns > 0)
{
status = nvlink_core_powerdown_intranode_conns_from_active_to_swcfg(conns,
numConns,
flags);
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_powerdown_links_from_active_to_swcfg_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
//
// Squash status. If any side of link doesn not respond the link is
// shutdown unilaterally
//
return NVL_SUCCESS;
}
/**
* Reset the given array of links
*
* param[in] links Array of links to be reset
* param[in] numLinks Number of links to be shutdown
* param[in] flags Flags to track if the transition is sync/async
*
* return NVL_SUCCESS if the links were reset successfully
*/
NvlStatus
nvlink_lib_reset_links
(
nvlink_link **links,
NvU32 numLinks,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvU32 numConns = 0;
NvU32 i;
NvU32 lockLinkCount = 0;
nvlink_link **lockLinks = NULL;
if ((links == NULL) || (numLinks == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No links to reset\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Allocate space for the link locks
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * numLinks));
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * numLinks);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
nvlink_free((void *)lockLinks);
return NVL_ERR_GENERIC;
}
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_reset_links_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
// Sanity checking if the link is already in OFF/RESET state
for (i = 0; i < numLinks; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (conn == NULL)
{
continue;
}
// Capture both the link and its end-point
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
conns[numConns] = conn;
numConns++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_reset_links_end;
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
if (numConns > 0)
{
status = nvlink_core_reset_intranode_conns(conns, numConns, flags);
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_reset_links_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
return status;
}

View File

@@ -0,0 +1,826 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
/**
* Check whether a group of links have completed training
*
* @param[in] links List of NVLink Link pointers
* @param[in] linkCount Count of #links
*
* return NL_SUCCESS if all links transitioned to Active
*/
NvlStatus
nvlink_lib_check_training_complete
(
nvlink_link **links,
NvU32 linkCount
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_link **lockLinks = NULL;
NvU32 lockLinkCount = 0;
NvU32 i;
if (links == NULL || linkCount == 0)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer or linkCount!\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
lockLinks = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_check_training_complete_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
//
// Get the array of both local and remote endpoints whose lock needs
// to be acquired
//
for (i = 0; i < linkCount; i++)
{
if ((lockLinkCount + 1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: lockLinkCount >= NVLINK_MAX_SYSTEM_LINK_NUM",
__FUNCTION__));
nvlink_assert(0);
// Release the top-level lock
nvlink_lib_top_lock_release();
status = NVL_ERR_INVALID_STATE;
goto nvlink_lib_check_training_complete_end;
}
// Capture both the link and its end-point
lockLinks[lockLinkCount] = links[i];
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_check_training_complete_end;
}
nvlink_lib_top_lock_release();
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_check_training_complete_end:
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
return status;
}
/**
* Train a given set of links from SWCFG to ACTIVE state
*
* Note: For training the links one by one - its the responsibility of
* the caller to call this function every time for each link
*
* @param[in] links List of NVLink Link pointers
* @param[in] linkCount Count of #links
* @param[in] flags Flag to track whether training is sync/async
*
* return NL_SUCCESS if the link state transition was a success
*/
NvlStatus
nvlink_lib_train_links_from_swcfg_to_active
(
nvlink_link **links,
NvU32 linkCount,
NvU32 flags
)
{
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvlStatus status = NVL_SUCCESS;
NvU32 connCount = 0;
NvU32 i, j;
nvlink_link **lockLinks = NULL;
NvU32 lockLinkCount = 0;
if (links == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * linkCount));
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * linkCount);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
status = NVL_ERR_GENERIC;
goto nvlink_lib_train_links_from_swcfg_to_active_end;
}
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * linkCount);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_train_links_from_swcfg_to_active_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
//
// Get the array of both local and remote endpoints whose lock needs
// to be acquired
//
for (i = 0; i < linkCount; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
if (!conn)
{
//
// Could not find the connection for the link. Release the
// top-level lock and return
//
nvlink_lib_top_lock_release();
status = NVL_ERR_GENERIC;
goto nvlink_lib_train_links_from_swcfg_to_active_end;
}
// Capture both the link and its end-point
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_train_links_from_swcfg_to_active_end;
}
// Check all the connections which need to be trained
for (i = 0; i < linkCount; i++)
{
conn = NULL;
nvlink_core_get_intranode_conn(links[i], &conn);
// Don't train links that didn't receive CONFIG_GOOD (NVLINK3+)
if (((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30)) &&
(!links[i]->bInitnegotiateConfigGood))
{
continue;
}
// Check if the link is already in ACTIVE
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
{
continue;
}
// We can train connections to HS only when they are already in SAFE
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
if (status != NVL_SUCCESS)
{
continue;
}
// Check if the connection is not already considered
for (j = 0; j < connCount; j++)
{
if (conns[j] == conn)
break;
}
// If this is a new connection, add it to the list
if (j == connCount)
{
conns[connCount] = conn;
connCount++;
}
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
if (connCount > 0)
{
if ((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30))
{
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(conns,
connCount,
flags);
}
else
{
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(conns,
connCount,
flags);
}
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_train_links_from_swcfg_to_active_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
return status;
}
/**
* Train a given set of links of a device from L2 to ACTIVE state
*
* param[in] dev NVLink Device pointer
* param[in] linkMask Mask of links to be trained
* param[in] flags Flags to track if the transition is sync/async
*
* return NVL_SUCCESS if the links train to ACTIVE
*/
NvlStatus
nvlink_lib_train_links_from_L2_to_active
(
nvlink_device *dev,
NvU32 linkMask,
NvU32 flags
)
{
NvlStatus status = NVL_SUCCESS;
nvlink_link *link = NULL;
nvlink_intranode_conn **conns = NULL;
nvlink_intranode_conn *conn = NULL;
NvU32 numLinks = 0;
NvU32 numConns = 0;
NvU32 connCount = 0;
NvU32 i;
nvlink_link **lockLinks = NULL;
NvU32 lockLinkCount = 0;
if (dev == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad device pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
lockLinks = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (lockLinks == NULL)
{
return NVL_NO_MEM;
}
// Allocate space for the connection list
conns = (nvlink_intranode_conn **)nvlink_malloc(
sizeof(nvlink_intranode_conn *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (conns == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate space for connections list\n",
__FUNCTION__));
status = NVL_ERR_GENERIC;
goto nvlink_lib_train_links_from_L2_to_active_end;
}
// Initialize the list of links
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * 32);
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
goto nvlink_lib_train_links_from_L2_to_active_end;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
// Get the array of link endpoints whose lock needs to be acquired
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
if (conn == NULL)
{
//
// Could not find the connection for the link. Release the
// top-level lock and return
//
nvlink_lib_top_lock_release();
status = NVL_ERR_GENERIC;
goto nvlink_lib_train_links_from_L2_to_active_end;
}
lockLinks[lockLinkCount] = conn->end0;
lockLinkCount++;
lockLinks[lockLinkCount] = conn->end1;
lockLinkCount++;
}
// Acquire the per-link locks for all links captured
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
goto nvlink_lib_train_links_from_L2_to_active_end;
}
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
// If the link received a L2 entry request, but never entered L2
if (link->powerStateTransitionStatus == nvlink_power_state_entering_L2)
{
// Update the power state transition status
link->powerStateTransitionStatus = nvlink_power_state_in_L0;
continue;
}
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
// Check the connection state to verify if the link is already in HS
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
if (status == NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Link is not in sleep %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
continue;
}
// Check the connection state to verify if the link is already in SAFE
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
if (status == NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link is not in sleep %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
continue;
}
// Mark the power state transition for the link
link->powerStateTransitionStatus = nvlink_power_state_exiting_L2;
}
FOR_EACH_LINK_REGISTERED(link, dev, node)
{
if (!(linkMask & (1 << link->linkNumber)))
{
continue;
}
if (link->powerStateTransitionStatus == nvlink_power_state_exiting_L2)
{
// Get the connection associated with the link
conn = NULL;
nvlink_core_get_intranode_conn(link, &conn);
// Verify if both the endpoints desire to exit SLEEP
if ((conn->end0->powerStateTransitionStatus == nvlink_power_state_exiting_L2) &&
(conn->end1->powerStateTransitionStatus == nvlink_power_state_exiting_L2))
{
// Increment the #connections considered for exiting L2
numConns++;
// Check if the the connection is already included in the list
for (i = 0; i < connCount; i++)
{
if (conns[i] == conn)
break;
}
// If this is a new connection, add it to the list
if (i == connCount)
{
conns[connCount] = conn;
connCount++;
}
}
// Increment the #links considered for exiting L2
numLinks++;
}
}
//
// All the required per-link locks are successfully acquired
// The connection list traversal is also complete now
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Clear the status variable
status = NVL_SUCCESS;
if (connCount > 0)
{
status = nvlink_core_train_intranode_conns_from_from_L2_to_active(conns, connCount, flags);
}
if (status == NVL_SUCCESS)
{
//
// If some links are waiting on the remote end to exit sleep,
// update status to NVL_MORE_PROCESSING_REQUIRED
//
status = (numLinks != numConns ? NVL_MORE_PROCESSING_REQUIRED : NVL_SUCCESS);
}
// Release the per-link locks
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
nvlink_lib_train_links_from_L2_to_active_end:
if (conns != NULL)
{
nvlink_free((void *)conns);
}
if (lockLinks != NULL)
{
nvlink_free((void *)lockLinks);
}
return status;
}
/**
* Retrain a given link from SWCFG to ACTIVE
*
* @param[in] link NVLink Link pointer
* @param[in] flags Flag to track if the training is aync/async
*
* return NVL_SUCCESS if the training was successful
*/
NvlStatus
nvlink_lib_retrain_link_from_swcfg_to_active
(
nvlink_link *link,
NvU32 flags
)
{
nvlink_intranode_conn *conn = NULL;
NvlStatus status = NVL_SUCCESS;
if (!link)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad link pointer specified.\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Acquire the top-level lock
status = nvlink_lib_top_lock_acquire();
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return status;
}
//
// Top-level lock is now acquired. Proceed to traversing the device
// and link lists and connection lists
//
// Get the connection associated with the link
nvlink_core_get_intranode_conn(link, &conn);
if (!conn)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No connection was found for %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
//
// Could not find the connection for the link. Release the
// top-level lock and return
//
nvlink_lib_top_lock_release();
return NVL_ERR_GENERIC;
}
// create array of one conn and two link endpoints
nvlink_intranode_conn *conns[1] = {conn};
nvlink_link *links[2] = {0};
links[0] = conn->end0;
links[1] = conn->end1;
// Acquire the per-link locks for the links
status = nvlink_lib_link_locks_acquire(links, 2);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link locks\n",
__FUNCTION__));
// Release the top-level lock
nvlink_lib_top_lock_release();
return status;
}
//
// All the required per-link locks are successfully acquired
// Release the top level-lock
//
nvlink_lib_top_lock_release();
// Check if the link is already in ACTIVE
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
{
// Release the per-link locks
nvlink_lib_link_locks_release(links, 2);
return status;
}
// We can train connections to HS only when they are already in SAFE
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
if (status != NVL_SUCCESS)
{
// Release the per-link locks
nvlink_lib_link_locks_release(links, 2);
return status;
}
if ((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30))
{
if (!conn->end0->bInitnegotiateConfigGood ||
!conn->end1->bInitnegotiateConfigGood)
{
status = NVL_ERR_GENERIC;
}
else
{
// ALT training for NVLink3.0+
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(conns, 0x1, flags);
}
}
else
{
// Legacy training for pre-NVLink3.0
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(conns, 0x1, flags);
}
// Release the per-link locks
nvlink_lib_link_locks_release(links, 2);
return status;
}
/**
* Save training seeds into the link structure
*
* @param[in] link NVLink Link pointer
* @param[in] seedData Training seed information
*
* return NVL_SUCCESS if the seed saving was successful
*/
NvlStatus
nvlink_lib_save_training_seeds
(
nvlink_link *link,
NvU32 *seedData
)
{
NvlStatus status = NVL_SUCCESS;
// Check to make sure we are given a buffer of data
if (seedData == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No seed data given to store %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NVL_ERR_GENERIC;
}
NvU32 size = seedData[0];
// check to make sure the size is not out of bounds
if (size > NVLINK_MAX_SEED_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad data, size of %d out of bounds %s: %s.\n",
__FUNCTION__, size, link->dev->deviceName, link->linkName));
return NVL_ERR_GENERIC;
}
// Acquire the per-link lock
status = nvlink_lib_link_locks_acquire(&link, 1);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link lock\n",
__FUNCTION__));
return status;
}
//always using corelib defined structures for size
nvlink_memcpy(link->seedData, seedData, sizeof(link->seedData));
// Release the per-link locks
nvlink_lib_link_locks_release(&link, 1);
return NVL_SUCCESS;
}
/**
* Copy training seeds from the link structure
*
* @param[in] link NVLink Link pointer
* @param[in] seedData Training seed information
*
* return NVL_SUCCESS if the seed copy was successful
*/
NvlStatus
nvlink_lib_copy_training_seeds
(
nvlink_link *link,
NvU32 *seedDataCopy
)
{
NvlStatus status = NVL_SUCCESS;
// Check to make sure we are given a buffer to copy into
if (seedDataCopy == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: No seed data structure given to store into %s: %s.\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NVL_ERR_GENERIC;
}
NvU32 size = link->seedData[0];
// check to make sure the size is not out of bounds
if (size > NVLINK_MAX_SEED_NUM)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Bad data, size of %d out of bounds %s: %s.\n",
__FUNCTION__, size, link->dev->deviceName, link->linkName));
return NVL_ERR_GENERIC;
}
// Acquire the per-link lock
status = nvlink_lib_link_locks_acquire(&link, 1);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire per-link lock\n",
__FUNCTION__));
return status;
}
nvlink_memcpy(seedDataCopy, link->seedData, sizeof(link->seedData));
// Release the per-link locks
nvlink_lib_link_locks_release(&link, 1);
return NVL_SUCCESS;
}

View File

@@ -0,0 +1,90 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_CTX_H_
#define _NVLINK_CTX_H_
//
// Link transition times in ms.
// TODO: Review with HW for optimal transition times;
//
#define LINK_TRANSITION_TIME_OFF 1
#define LINK_TRANSITION_TIME_SAFE 5
#define LINK_TRANSITION_TIME_HS 500
#define LINK_TRANSITION_TIMEOUT_IN_MS 2000
typedef struct
{
/*
* Lock for all core lib structures except nvlink_link structures
*/
void *topLevelLock;
/*
* Head of the device-list
*/
nvlink_device nv_devicelist_head;
/*
* Head of the established intranode nvlink connections list
*/
nvlink_intranode_conn nv_intraconn_head;
/*
* Head of the added internode nvlink connections list
*/
nvlink_internode_conn nv_interconn_head;
/*
* Topology information
* registeredEndpoints : #Endpoints registered in the core library
* connectedEndpoints : #Endpoints whose remote has been determined
* notConnectedEndpoints: #Endpoints whose remote has not been determined
*/
NvU32 registeredEndpoints;
NvU32 connectedEndpoints;
NvU32 notConnectedEndpoints;
NvBool bNewEndpoints;
/*
* Endpoint count in different link states
* endpointsInSafe : #Endpoints in SAFE state
* endpointsInFail : #Endpoints that failed to transition to ACTIVE
* endpointsInActive: #Endpoints in ACTIVE
*/
NvU32 endpointsInSafe;
NvU32 endpointsInFail;
NvU32 endpointsInActive;
/*
* Fabric node id set by ioctl interface. This id will be assigned to each
* nvlink device during registration and matched for endpoint look-up on
* ioctls, which operate on endpoints.
*/
NvU16 nodeId;
}nvlink_lib_context;
extern nvlink_lib_context nvlinkLibCtx;
#endif //_NVLINK_CTX_H_

View File

@@ -0,0 +1,365 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVLINK_HELPER_H_
#define _NVLINK_HELPER_H_
//
// fabric node id will be used as MSB 16 bits of the link token value to
// generate a unique token for discovering connections
//
#define NVLINK_FABRIC_NODE_ID_MASK 0xFFFF
#define NVLINK_FABRIC_NODE_ID_POS 48
/**
* Check if the device type is supported
*/
NvBool nvlink_core_is_supported_device_type(NvU32 devType);
/**
* Get the link and sublink states for the endpoint
*/
void nvlink_core_get_endpoint_state(nvlink_link *link, nvlink_link_state *linkState);
/**
* Get the nvlink_device * from the PCI DBDF
*/
void nvlink_core_get_device_by_devinfo(nvlink_device_info *devInfo, nvlink_device **dev);
/**
* Get the nvlink_link * from the PCI DBDF and link#
*/
void nvlink_core_get_link_by_endpoint(nvlink_endpoint *endPoint, nvlink_link **link);
/**
* Given the nvlink_link ptr, copy the endpoint details for the link
*/
void nvlink_core_copy_endpoint_info(nvlink_link *connLink, nvlink_endpoint *endPointInfo);
/**
* Given the nvlink_device ptr, copy the device details
*/
void nvlink_core_copy_device_info(nvlink_device *tmpDev, nvlink_detailed_dev_info *devInfo);
/************************************************************************************************/
/****************************** NVLink initialization functions *********************************/
/************************************************************************************************/
/**
* Kick-off INITPHASE1 on the given array of links
*/
NvlStatus nvlink_core_initphase1(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Kick-off INITRXTERM on the given array of links
*/
NvlStatus nvlink_core_rx_init_term(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Kick-off receiver detect on the given array of links
*/
NvlStatus nvlink_core_set_rx_detect(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Get receiver detect status on the given array of links
*/
NvlStatus nvlink_core_get_rx_detect(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Get Enable TX common mode on the given array of links
*/
NvlStatus nvlink_core_enable_common_mode(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Get Disable TX common mode on the given array of links
*/
NvlStatus nvlink_core_disable_common_mode(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Perform RX calibration on the given array of links
*/
NvlStatus nvlink_core_calibrate_links(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Enable data on the given array of links
*/
NvlStatus nvlink_core_enable_data(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/**
* Transition to SWCFG on the given array of links
*/
NvlStatus nvlink_core_link_init_async(nvlink_link **links, NvU32 numLinks);
/**
* Poll on SAFE/SWCFG on the given link
*/
NvlStatus nvlink_core_wait_for_link_init(nvlink_link *link);
/**
* Initialize all the endpoints from OFF to SWCFG state
*/
void nvlink_core_init_links_from_off_to_swcfg(nvlink_link **pLinks,
NvU32 numLinks,
NvU32 flags);
/**
* Send INITNEGOTIATE command on the given array of links
*/
NvlStatus nvlink_core_initnegotiate(nvlink_link **links, NvU32 numLinks, NvU32 flags);
/************************************************************************************************/
/*************************** NVLink topology discovery functions ********************************/
/************************************************************************************************/
/**
* Generate a discovery token for the given link
*/
NvU64 nvlink_core_get_link_discovery_token(nvlink_link *link);
/**
* Write the dicovery token for the given link
*/
NvlStatus nvlink_core_write_link_discovery_token(nvlink_link *link, NvU64 token);
/**
* Read the dicovery token for the given link
*/
NvU64 nvlink_core_read_link_discovery_token(nvlink_link *link);
/**
* Detect the connection by correlating the tokens
*/
void nvlink_core_correlate_conn_by_token(nvlink_link *srcLink, NvU64 writeToken, NvBool skipReadToken);
/**
* For a given end of a link, returns the other end its connected to.
*/
void nvlink_core_discover_and_get_remote_end(nvlink_link *end,
nvlink_link **remote_end,
NvU32 flags);
/************************************************************************************************/
/********************************** NVLink training functions ***********************************/
/************************************************************************************************/
/**
* Train the internode connection link from SWCFG to ACTIVE
*/
NvlStatus nvlink_core_train_internode_conns_from_swcfg_to_active(nvlink_internode_conn **conns,
NvU32 connCount,
NvU32 *isMasterEnd,
NvU32 flags);
/**
* Train the internode connection sublink to enter high speed
*/
NvlStatus nvlink_core_train_internode_conn_sublink_from_safe_to_hs(nvlink_internode_conn *conn,
NvU32 flags);
/**
* Train a given set of intranode connections from L2 to ACTIVE state
*/
NvlStatus nvlink_core_train_intranode_conns_from_from_L2_to_active(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/**
* Train intranode connections associated with a list of links to HS using ALT sequence
*/
NvlStatus nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/**
* Train a single intranode connection associated with a list of links to HS using legacy
* pre-Ampere sequence
*/
NvlStatus nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/************************************************************************************************/
/********************************** NVLink shutdown functions ***********************************/
/************************************************************************************************/
/**
* [CLEAN SHUTDOWN]
*
* Shutdown given intranode connections from active to L2 state
*/
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_L2(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/**
* [PSEUDO-CLEAN SHUTDOWN]
*
* Shutdown the given array of intranode connections from ACTIVE to OFF state
*/
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_off(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/**
* Power down the given array of intranode connections from ACTIVE to SWCFG state
*/
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_swcfg(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/**
* Reset the given array of intranode connections
*/
NvlStatus nvlink_core_reset_intranode_conns(nvlink_intranode_conn **conns,
NvU32 connCount,
NvU32 flags);
/************************************************************************************************/
/**************************** NVLink connection management functions ****************************/
/************************************************************************************************/
/**
* For a given link, return the associated internode connection
*/
void nvlink_core_get_internode_conn(nvlink_link *localLink,
nvlink_internode_conn **conn);
/**
* Add a new internode connection to the list of connections
*/
NvlStatus nvlink_core_add_internode_conn(nvlink_link *localLink,
nvlink_remote_endpoint_info *remoteEndPoint);
/**
* For a given link, delete the associated internode connection
*/
void nvlink_core_remove_internode_conn(nvlink_link *localLink);
/**
* For a given link, return the associated intranode connection
*/
void nvlink_core_get_intranode_conn(nvlink_link *endpoint,
nvlink_intranode_conn **conn);
/**
* Add a new intranode connection to the list of intranode connections
*/
NvlStatus nvlink_core_add_intranode_conn(nvlink_link *end0, nvlink_link *end1);
/**
* Remove the connection from the list of intranode connections
*/
void nvlink_core_remove_intranode_conn(nvlink_intranode_conn *conn);
/**
* Check if the given intranode connection is in the specified mode
*/
NvlStatus nvlink_core_check_intranode_conn_state(nvlink_intranode_conn *conn,
NvU64 linkMode);
/**
* Copy the intranode connection's remote endpoint information into the nvlink_conn_info
* structure passed in
*/
void nvlink_core_copy_intranode_conn_info(nvlink_link *remote_end,
nvlink_conn_info *conn_info);
/**
* Copy the internode connection's remote endpoint information into the nvlink_conn_info
* structure passed in
*/
void nvlink_core_copy_internode_conn_info(nvlink_remote_endpoint_info *remote_end,
nvlink_conn_info *conn_info);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/
/************************************************************************************************/
/**
* For the given link, check whether the link state is at the requested state
*/
NvBool nvlink_core_check_link_state(nvlink_link *link, NvU64 linkState);
/**
* For the given link, check whether the tx sublink state is at the requested state
*/
NvBool nvlink_core_check_tx_sublink_state(nvlink_link *link, NvU64 txSublinkState);
/**
* For the given link, check whether the rx sublink state is at the requested state
*/
NvBool nvlink_core_check_rx_sublink_state(nvlink_link *link, NvU64 rxSublinkState);
/**
* Poll for the link to reach the particular state upto the given timeout. The link
* state transition is considered failed once timeout occurs
*/
NvlStatus nvlink_core_poll_link_state(nvlink_link *link,
NvU64 linkState,
NvU32 timeout);
/**
* Poll for a given timeout period for a sublink to reach the particular state. The
* sublink state transition is considered failed once timeout occurs
*/
NvlStatus nvlink_core_poll_sublink_state(nvlink_link *localTxSubLink,
NvU64 localTxSubLinkState,
NvU32 localTxSubLinkSubtate,
nvlink_link *remoteRxSubLink,
NvU64 remoteRxSubLinkState,
NvU32 remoteRxSubLinkSubstate,
NvU32 timeout);
/**
* Poll for the tx sublink to reach the specified state upto the given timeout. The
* sublink state transition is considered failed once timeout occurs
*/
NvlStatus nvlink_core_poll_tx_sublink_state(nvlink_link *link,
NvU64 txSublinkState,
NvU32 txSublinkSubState,
NvU32 timeout);
/**
* Poll for the rx sublink to reach the specified state upto the given timeout. The
* sublink state transition is considered failed once timeout occurs
*/
NvlStatus nvlink_core_poll_rx_sublink_state(nvlink_link *link,
NvU64 rxSublinkState,
NvU32 rxSublinkSubState,
NvU32 timeout);
/************************************************************************************************/
/****************** Nvlink print functions for devices/links/connections ************************/
/************************************************************************************************/
void nvlink_core_print_link_state(nvlink_link *link);
void nvlink_core_print_intranode_conn(nvlink_intranode_conn *conn);
#endif //_NVLINK_HELPER_H_

View File

@@ -0,0 +1,163 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "nvlink_ctx.h"
#include "nvlink_helper.h"
#include "nvlink_lock.h"
nvlink_lib_context nvlinkLibCtx = {0};
/*
* Initialize the nvlink core library
*
* return NVL_SUCCESS if the library is initialized successfully
*/
NvlStatus
nvlink_lib_initialize(void)
{
NvlStatus lock_status = NVL_SUCCESS;
if (nvlinkLibCtx.nv_devicelist_head.initialized == 0)
{
// Allocate top-level lock
lock_status = nvlink_lib_top_lock_alloc();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Initialize the device list head
nvListInit(&nvlinkLibCtx.nv_devicelist_head.link_list);
nvListInit(&nvlinkLibCtx.nv_devicelist_head.node);
nvlinkLibCtx.nv_devicelist_head.initialized = 1;
// Initialize the intranode connection list head
nvListInit(&nvlinkLibCtx.nv_intraconn_head.node);
// Initialize the internode connection list head
nvListInit(&nvlinkLibCtx.nv_interconn_head.node);
// Initialize registered and connected links to 0
nvlinkLibCtx.registeredEndpoints = 0;
nvlinkLibCtx.connectedEndpoints = 0;
nvlinkLibCtx.notConnectedEndpoints = 0;
//
// Initialize fabric node id to max value until set
// by ioctl interface
//
nvlinkLibCtx.nodeId = NV_U16_MAX ;
// Release top-level lock
nvlink_lib_top_lock_release();
}
return NVL_SUCCESS;
}
/*
* Unload the nvlink core library
*
* return NVL_SUCCESS if the library is unloaded successfully
*/
NvlStatus
nvlink_lib_unload(void)
{
NvlStatus lock_status = NVL_SUCCESS;
if (nvlink_lib_is_initialized())
{
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Check if there are no devices registered
if (nvlink_lib_is_device_list_empty())
{
nvlinkLibCtx.nv_devicelist_head.initialized = 0;
}
// Release and free top-level lock
nvlink_lib_top_lock_release();
nvlink_lib_top_lock_free();
}
return NVL_SUCCESS;
}
/*
* Check if the nvlink core library is initialized
*
* return NV_TRUE if the core library is already initialized
*/
NvBool
nvlink_lib_is_initialized(void)
{
return nvlinkLibCtx.nv_devicelist_head.initialized;
}
/*
* Check if there are any devices registered
*
* return NV_TRUE if there are devices registered in the core library
*/
NvBool
nvlink_lib_is_device_list_empty(void)
{
NvBool isEmpty = NV_TRUE;
isEmpty = nvListIsEmpty(&nvlinkLibCtx.nv_devicelist_head.node);
return isEmpty;
}

View File

@@ -0,0 +1,488 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvlink.h"
#include "nvtypes.h"
#include "nvlink_export.h"
#include "nvlink_os.h"
#include "nvlink_ctx.h"
#include "nvlink_helper.h"
#include "nvlink_lock.h"
//
// Only enabling locking for testing purposes at the moment.
// Disabled at all other times.
//
#define LOCKING_DISABLED 1
static void _sort_links(nvlink_link **, NvU32, NvBool (*)(void *, void *));
static NvBool _compare(void *, void *);
/*
* Allocate top level lock. Return NVL_SUCCESS if
* the lock was allocated else return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_top_lock_alloc(void)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
void *top_lock = NULL;
// Check if top level lock is already allocated
if (nvlinkLibCtx.topLevelLock != NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Top-level lock already allocated\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
top_lock = nvlink_allocLock();
if (NULL == top_lock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate top-level lock\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
nvlinkLibCtx.topLevelLock = top_lock;
// Top-level lock allocated
return NVL_SUCCESS;
}
/*
* Free top level lock. Return NVL_SUCCESS if
* the lock was freed else return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_top_lock_free(void)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
// Check if already freed
if (NULL == nvlinkLibCtx.topLevelLock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Top-level lock not allocated/already freed\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
nvlink_freeLock(nvlinkLibCtx.topLevelLock);
nvlinkLibCtx.topLevelLock = NULL;
// Top-level lock freed
return NVL_SUCCESS;
}
/*
* Allocate per-link lock. Return NVL_SUCCESS if
* the lock was allocated else return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_link_lock_alloc
(
nvlink_link *link
)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
void *link_lock = NULL;
// Check if already allocated
if (link->linkLock != NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link lock already allocated on this link\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
link_lock = nvlink_allocLock();
if (NULL == link_lock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to allocate link lock\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Link lock allocated
link->linkLock = link_lock;
return NVL_SUCCESS;
}
/*
* Free per-link lock. Return NVL_SUCCESS if
* the lock was freed else return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_link_lock_free
(
nvlink_link *link
)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
// Check if already freed
if (NULL == link->linkLock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Link lock not allocated/already freed\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
nvlink_freeLock(link->linkLock);
link->linkLock = NULL;
// Link lock freed
return NVL_SUCCESS;
}
/*
* Acquire top level lock. Return NVL_SUCCESS if
* the lock was acquired else return NVL_ERR_STATE_IN_USE.
*/
NvlStatus
nvlink_lib_top_lock_acquire(void)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
// Check if top-level lock is allocated before attempting to acquire
if (NULL == nvlinkLibCtx.topLevelLock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Top-level lock is not allocated\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
//
// ToDo: Check if the lock was acquired succesfully
// Currently the nvlink_acquireLock function doesn't report failures
//
nvlink_acquireLock(nvlinkLibCtx.topLevelLock);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Acquired top-level lock\n",
__FUNCTION__));
return NVL_SUCCESS;
}
/*
* Release top level lock. Return NVL_SUCCESS if
* the lock was released else return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_top_lock_release(void)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
// Check if top-level lock is allocated before attempting to release
if (NULL == nvlinkLibCtx.topLevelLock)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Top-level lock is not allocated\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
//
// ToDo: Check if the lock was released succesfully
// Currently the nvlink_releaseLock function doesn't report failures
//
nvlink_releaseLock(nvlinkLibCtx.topLevelLock);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Released top-level lock\n",
__FUNCTION__));
return NVL_SUCCESS;
}
/*
* Sort the array of links in order of (DBDF, link#) -
* lowest to highest and acquire link locks.
* Return NVL_SUCCESS if all the link locks were acquired.
* Else if any link lock failed to be acquired, release
* all acquired link locks and return NVL_ERR_STATE_IN_USE.
*/
NvlStatus
nvlink_lib_link_locks_acquire
(
nvlink_link **links,
int numLinks
)
{
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
int i;
nvlink_link *link_prev = NULL;
// Check if array of links is already empty before attempting to release.
if ((NULL == links) || (numLinks == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Could not release the link locks. Link array is empty !\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Sort the link array in increasing order of (DBDF, link#)
_sort_links(links, numLinks, _compare);
for (i = 0; i < numLinks; i++)
{
//
// Don't acquire locks on loop back links twice since the current link is
// the same as the previous one
//
if (links[i] != link_prev)
{
nvlink_acquireLock(links[i]->linkLock);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Acquire link lock for dom:%d bus:%d dev:%d fun:%d link:%d\n",
__FUNCTION__,
links[i]->dev->pciInfo.domain, links[i]->dev->pciInfo.bus,
links[i]->dev->pciInfo.device, links[i]->dev->pciInfo.function,
links[i]->linkNumber));
}
link_prev = links[i];
}
//
// ToDo: Check if the lock was acquired succesfully
// Currently the nvlink_acquireLock function doesn't report failures
//
return NVL_SUCCESS;
}
/*
* Loop over all the links and call nvlink_releaseLock(links[i]->linkLock).
* Return NVL_SUCCESS if all the link locks were released.
* Else if any link lock failed to be released return NVL_ERR_GENERIC.
*/
NvlStatus
nvlink_lib_link_locks_release
(
nvlink_link **links,
int numLinks
)
{
int i;
if (LOCKING_DISABLED)
{
return NVL_SUCCESS;
}
nvlink_link *link_prev = NULL;
// Check if array of links is already empty before attempting to release.
if ((NULL == links) || (numLinks == 0))
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Could not release the link locks. Link array is empty !\n",
__FUNCTION__));
return NVL_ERR_GENERIC;
}
// Sort the link array in increasing order of (DBDF, link#)
_sort_links(links, numLinks, _compare);
for (i = 0; i < numLinks; i++)
{
//
// Don't release locks on loop back links twice since the current link is
// the same as the previous one
//
if (links[i] != link_prev)
{
nvlink_releaseLock(links[i]->linkLock);
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
"%s: Release link lock for dom:%d bus:%d dev:%d fun:%d link:%d\n",
__FUNCTION__,
links[i]->dev->pciInfo.domain, links[i]->dev->pciInfo.bus,
links[i]->dev->pciInfo.device, links[i]->dev->pciInfo.function,
links[i]->linkNumber));
}
link_prev = links[i];
}
//
// ToDo: Check if the lock was released succesfully
// Currently the nvlink_releaseLock function doesn't report failures
//
return NVL_SUCCESS;
}
/*
* Sorts the links in the increasing order of DBDF, link#
*/
static void
_sort_links
(
nvlink_link **links,
NvU32 numLinks,
NvBool (*compare)(void *, void *)
)
{
nvlink_link *temp = NULL;
NvU32 i, j;
for (i = 0; i < numLinks; i++)
{
for (j = i + 1; j < numLinks; j++)
{
if (_compare(links[j], links[i]))
{
temp = links[i];
links[i] = links[j];
links[j] = temp;
}
}
}
}
/*
* Compare function for _nvlink_sort - compares DBDF, link#
*/
static NvBool
_compare
(
void *link1,
void *link2
)
{
nvlink_link *l1 = (nvlink_link *) link1;
nvlink_link *l2 = (nvlink_link *) link2;
// Compare link domains
if (l1->dev->pciInfo.domain < l2->dev->pciInfo.domain)
{
return NV_TRUE;
}
if (l1->dev->pciInfo.domain > l2->dev->pciInfo.domain)
{
return NV_FALSE;
}
// Domain is same for devices of links. Compare bus next
// Compare link buses
if (l1->dev->pciInfo.bus < l2->dev->pciInfo.bus)
{
return NV_TRUE;
}
if (l1->dev->pciInfo.bus > l2->dev->pciInfo.bus)
{
return NV_FALSE;
}
// Bus is same for devices of links. Compare device next
// Compare link devices
if (l1->dev->pciInfo.device < l2->dev->pciInfo.device)
{
return NV_TRUE;
}
if (l1->dev->pciInfo.device > l2->dev->pciInfo.device)
{
return NV_FALSE;
}
// Device is same for devices of links. Compare function next
// Compare link functions
if (l1->dev->pciInfo.function < l2->dev->pciInfo.function)
{
return NV_TRUE;
}
if (l1->dev->pciInfo.function > l2->dev->pciInfo.function)
{
return NV_FALSE;
}
// DBDF is same for both the links. Check the link#
// Compare link numbers
if (l1->linkNumber < l2->linkNumber)
{
return NV_TRUE;
}
else
{
return NV_FALSE;
}
}