mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-27 02:13:59 +00:00
515.43.04
This commit is contained in:
566
src/common/nvlink/interface/nvlink.h
Normal file
566
src/common/nvlink/interface/nvlink.h
Normal file
@@ -0,0 +1,566 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//
|
||||
// nvlink.h
|
||||
//
|
||||
|
||||
#ifndef _NVLINK_H_
|
||||
#define _NVLINK_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <nv-kernel-interface-api.h>
|
||||
#include "nvlink_common.h"
|
||||
#include "nvlink_lib_ctrl.h"
|
||||
#include "nv_list.h"
|
||||
#include "nvlink_errors.h"
|
||||
#include "nvCpuUuid.h"
|
||||
|
||||
// Debug Prints
|
||||
#if defined(DEVELOP) || defined(DEBUG) || defined(NV_MODS)
|
||||
#define NVLINK_PRINT_ENABLED 1
|
||||
#define NVLINK_PRINT(format_and_stuff) nvlink_print format_and_stuff
|
||||
|
||||
#define DBG_MODULE_NVLINK_CORE __FILE__, __LINE__, __FUNCTION__
|
||||
#define DBG_MODULE_IBMNPU DBG_MODULE_NVLINK_CORE
|
||||
#define DBG_MODULE_TEGRASHIM DBG_MODULE_NVLINK_CORE
|
||||
#define DBG_MODULE_EBRIDGE DBG_MODULE_NVLINK_CORE
|
||||
#define DBG_MODULE_NVSWITCH DBG_MODULE_NVLINK_CORE
|
||||
#else
|
||||
#define NVLINK_PRINT(format_and_stuff) ((void)(0))
|
||||
#endif
|
||||
|
||||
// Devices that support NVLINK
|
||||
#define NVLINK_DEVICE_TYPE_EBRIDGE 0x0
|
||||
#define NVLINK_DEVICE_TYPE_IBMNPU 0x1
|
||||
#define NVLINK_DEVICE_TYPE_GPU 0x2
|
||||
#define NVLINK_DEVICE_TYPE_NVSWITCH 0x3
|
||||
#define NVLINK_DEVICE_TYPE_TEGRASHIM 0x4
|
||||
|
||||
// NVLink versions
|
||||
#define NVLINK_DEVICE_VERSION_10 0x00000001
|
||||
#define NVLINK_DEVICE_VERSION_20 0x00000002
|
||||
#define NVLINK_DEVICE_VERSION_22 0x00000004
|
||||
#define NVLINK_DEVICE_VERSION_30 0x00000005
|
||||
#define NVLINK_DEVICE_VERSION_31 0x00000006
|
||||
#define NVLINK_DEVICE_VERSION_40 0x00000007
|
||||
|
||||
// Link Transition Timeouts in miliseconds
|
||||
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
|
||||
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
|
||||
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
|
||||
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70
|
||||
|
||||
// Link training seed values
|
||||
#define NVLINK_MAX_SEED_NUM 6
|
||||
#define NVLINK_MAX_SEED_BUFFER_SIZE NVLINK_MAX_SEED_NUM + 1
|
||||
|
||||
#define NVLINK_MAX_SYSTEM_LINK_NUM 624
|
||||
|
||||
// Forwards
|
||||
struct nvlink_device;
|
||||
struct nvlink_device_handle;
|
||||
struct nvlink_link;
|
||||
struct nvlink_link_handlers;
|
||||
|
||||
// nvlink device state
|
||||
struct nvlink_device
|
||||
{
|
||||
NVListRec node;
|
||||
|
||||
// List of links associated with this device
|
||||
NVListRec link_list;
|
||||
|
||||
// Uniquely identifies a device in the core
|
||||
NvU64 deviceId;
|
||||
|
||||
// Client supplied names and ids
|
||||
char *driverName;
|
||||
char *deviceName;
|
||||
NvU8 *uuid;
|
||||
|
||||
// PCI Information
|
||||
struct nvlink_pci_info pciInfo;
|
||||
|
||||
// Device type and status
|
||||
NvU64 type;
|
||||
NvBool initialized;
|
||||
|
||||
// fabric node id
|
||||
NvU16 nodeId;
|
||||
|
||||
// Client private information
|
||||
void *pDevInfo;
|
||||
};
|
||||
|
||||
// nvlink link change type
|
||||
enum nvlink_link_change_type
|
||||
{
|
||||
nvlink_retrain_from_off,
|
||||
nvlink_retrain_from_safe,
|
||||
|
||||
};
|
||||
|
||||
// nvlink link_change parameters
|
||||
struct nvlink_link_change
|
||||
{
|
||||
struct nvlink_link *master;
|
||||
struct nvlink_link *slave;
|
||||
|
||||
enum nvlink_link_change_type change_type;
|
||||
};
|
||||
|
||||
// nvlink link state
|
||||
struct nvlink_link
|
||||
{
|
||||
NVListRec node;
|
||||
|
||||
// Device the link is associated with
|
||||
struct nvlink_device *dev;
|
||||
|
||||
// Lock for per link structure
|
||||
void *linkLock;
|
||||
|
||||
// Uniquely identifies a link in the core
|
||||
NvU64 linkId;
|
||||
|
||||
// If this link is the master of its connection
|
||||
NvBool master;
|
||||
|
||||
// Client supplied link name and number
|
||||
char *linkName;
|
||||
NvU32 linkNumber;
|
||||
|
||||
NvU64 token;
|
||||
|
||||
// Link state
|
||||
NvU32 state;
|
||||
NvBool inSWCFG;
|
||||
|
||||
// Sublink states
|
||||
NvU32 tx_sublink_state;
|
||||
NvU32 rx_sublink_state;
|
||||
|
||||
// Has rceiver detect passed
|
||||
NvBool bRxDetected;
|
||||
|
||||
// Link failed when sending InitPll to minion
|
||||
NvBool bTxCommonModeFail;
|
||||
|
||||
// Link failed when transitioning to SWCFG
|
||||
NvBool bSafeTransitionFail;
|
||||
|
||||
// Link failed when sending INITPHASE5 to minion
|
||||
NvBool bInitphase5Fails;
|
||||
|
||||
// IP version
|
||||
NvU32 version;
|
||||
|
||||
// Has state been saved
|
||||
NvBool bStateSaved;
|
||||
|
||||
// Number of retries to put link to safe
|
||||
NvU32 safe_retries;
|
||||
|
||||
// Set if LINK is ac coupled
|
||||
NvBool ac_coupled;
|
||||
|
||||
// Number of retries to discover the other end of the link
|
||||
NvU32 packet_injection_retries;
|
||||
|
||||
// Local Sid of the link.
|
||||
NvU64 localSid;
|
||||
|
||||
// Remote Sid of the link.
|
||||
NvU64 remoteSid;
|
||||
|
||||
// Remote LinkId to which the current link is connected.
|
||||
NvU32 remoteLinkId;
|
||||
|
||||
NvU32 remoteDeviceType;
|
||||
|
||||
// Has INITNEGOTIATE received CONFIG_GOOD (NVL3.0+)
|
||||
NvBool bInitnegotiateConfigGood;
|
||||
|
||||
// Power state transition status
|
||||
enum
|
||||
{
|
||||
nvlink_power_state_in_L0,
|
||||
nvlink_power_state_entering_L2,
|
||||
nvlink_power_state_in_L2,
|
||||
nvlink_power_state_exiting_L2
|
||||
} powerStateTransitionStatus;
|
||||
|
||||
// Link handlers
|
||||
const struct nvlink_link_handlers *link_handlers;
|
||||
|
||||
// Client private information
|
||||
void *link_info;
|
||||
|
||||
// Outstanding link change request information
|
||||
struct nvlink_link_change link_change;
|
||||
|
||||
//seed data for given nvlink
|
||||
NvU32 seedData[NVLINK_MAX_SEED_BUFFER_SIZE];
|
||||
};
|
||||
|
||||
// nvlink link handler ops
|
||||
struct nvlink_link_handlers
|
||||
{
|
||||
NV_API_CALL NvlStatus (*add) (struct nvlink_link *link);
|
||||
NV_API_CALL NvlStatus (*remove) (struct nvlink_link *link);
|
||||
NV_API_CALL NvlStatus (*lock) (struct nvlink_link *link);
|
||||
NV_API_CALL void (*unlock) (struct nvlink_link *link);
|
||||
NV_API_CALL NvlStatus (*queue_link_change) (struct nvlink_link_change *link_change);
|
||||
NV_API_CALL NvlStatus (*set_dl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NV_API_CALL NvlStatus (*get_dl_link_mode) (struct nvlink_link *link, NvU64 *mode);
|
||||
NV_API_CALL NvlStatus (*set_tl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NV_API_CALL NvlStatus (*get_tl_link_mode) (struct nvlink_link *link, NvU64 *mode);
|
||||
NV_API_CALL NvlStatus (*set_tx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NV_API_CALL NvlStatus (*get_tx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
|
||||
NV_API_CALL NvlStatus (*set_rx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NV_API_CALL NvlStatus (*get_rx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
|
||||
NV_API_CALL NvlStatus (*set_rx_detect) (struct nvlink_link *link, NvU32 flags);
|
||||
NV_API_CALL NvlStatus (*get_rx_detect) (struct nvlink_link *link);
|
||||
NV_API_CALL NvlStatus (*write_discovery_token) (struct nvlink_link *link, NvU64 token);
|
||||
NV_API_CALL NvlStatus (*read_discovery_token) (struct nvlink_link *link, NvU64 *token);
|
||||
NV_API_CALL void (*training_complete) (struct nvlink_link *link);
|
||||
NV_API_CALL void (*get_uphy_load) (struct nvlink_link *link, NvBool* bUnlocked);
|
||||
};
|
||||
|
||||
//
|
||||
// Represents an intranode connections in single/multi-node system.
|
||||
// Both endpoints of the connection is visible from same node.
|
||||
//
|
||||
struct nvlink_intranode_conn
|
||||
{
|
||||
NVListRec node;
|
||||
struct nvlink_link *end0;
|
||||
struct nvlink_link *end1;
|
||||
};
|
||||
|
||||
//
|
||||
// Represents internode connections in a multi-node system.
|
||||
// One of the endpoint of the connection must be a local link.
|
||||
//
|
||||
struct nvlink_internode_conn
|
||||
{
|
||||
NVListRec node;
|
||||
struct nvlink_link *local_end;
|
||||
nvlink_remote_endpoint_info remote_end;
|
||||
};
|
||||
|
||||
|
||||
// Typedefs
|
||||
typedef struct nvlink_device nvlink_device;
|
||||
typedef struct nvlink_device_handle nvlink_device_handle;
|
||||
typedef struct nvlink_link nvlink_link;
|
||||
typedef struct nvlink_link_change nvlink_link_change;
|
||||
typedef struct nvlink_device_handlers nvlink_device_handlers;
|
||||
typedef struct nvlink_link_handlers nvlink_link_handlers;
|
||||
typedef struct nvlink_intranode_conn nvlink_intranode_conn;
|
||||
typedef struct nvlink_internode_conn nvlink_internode_conn;
|
||||
typedef enum nvlink_link_change_type nvlink_link_change_type;
|
||||
typedef struct nvlink_inband_data nvlink_inband_data;
|
||||
|
||||
|
||||
#define NVLINK_MAX_NUM_SAFE_RETRIES 7
|
||||
#define NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES 4
|
||||
|
||||
|
||||
// NVLINK LINK states
|
||||
#define NVLINK_LINKSTATE_OFF 0x00 // OFF
|
||||
#define NVLINK_LINKSTATE_HS 0x01 // High Speed
|
||||
#define NVLINK_LINKSTATE_SAFE 0x02 // Safe/Discovery State
|
||||
#define NVLINK_LINKSTATE_FAULT 0x03 // Faulty
|
||||
#define NVLINK_LINKSTATE_RECOVERY 0x04 // Recovery
|
||||
#define NVLINK_LINKSTATE_FAIL 0x05 // Unconnected/Fail
|
||||
#define NVLINK_LINKSTATE_DETECT 0x06 // Detect mode
|
||||
#define NVLINK_LINKSTATE_RESET 0x07 // Reset
|
||||
#define NVLINK_LINKSTATE_ENABLE_PM 0x08 // Enable Link Power Management
|
||||
#define NVLINK_LINKSTATE_DISABLE_PM 0x09 // Disable Link Power Management
|
||||
#define NVLINK_LINKSTATE_SLEEP 0x0A // Sleep (L2)
|
||||
#define NVLINK_LINKSTATE_SAVE_STATE 0x0B // Save state while entering L2
|
||||
#define NVLINK_LINKSTATE_RESTORE_STATE 0x0C // Restore state while exiting L2
|
||||
#define NVLINK_LINKSTATE_PRE_HS 0x0E // Settings before moving to High Speed
|
||||
#define NVLINK_LINKSTATE_DISABLE_ERR_DETECT 0x0F // Disable Error detection (interrupt)
|
||||
#define NVLINK_LINKSTATE_LANE_DISABLE 0x10 // Disable Lanes
|
||||
#define NVLINK_LINKSTATE_LANE_SHUTDOWN 0x11 // Shutdown Lanes in PHY
|
||||
#define NVLINK_LINKSTATE_TRAFFIC_SETUP 0x12 // Setup traffic flow after ACTIVE
|
||||
#define NVLINK_LINKSTATE_INITPHASE1 0x13 // INITPHASE1
|
||||
#define NVLINK_LINKSTATE_INITNEGOTIATE 0x14 // Initialize the negotiation (Ampere And Later)
|
||||
#define NVLINK_LINKSTATE_POST_INITNEGOTIATE 0x15 // Sends DL stat
|
||||
#define NVLINK_LINKSTATE_INITOPTIMIZE 0x16 // INITOPTIMIZE
|
||||
#define NVLINK_LINKSTATE_POST_INITOPTIMIZE 0x17 // POST INITOPTIMIZE DL stat check
|
||||
#define NVLINK_LINKSTATE_DISABLE_HEARTBEAT 0x18 // Disables the heartbeat errors
|
||||
#define NVLINK_LINKSTATE_CONTAIN 0x19 // TL is in contain mode
|
||||
#define NVLINK_LINKSTATE_INITTL 0x1A // INITTL
|
||||
#define NVLINK_LINKSTATE_INVALID 0xFF // Invalid state
|
||||
|
||||
// NVLINK TX SUBLINK states
|
||||
#define NVLINK_SUBLINK_STATE_TX_HS 0x0 // TX High Speed
|
||||
#define NVLINK_SUBLINK_STATE_TX_SINGLE_LANE 0x4 // TX Single Lane (1/8th or 1/4th) Mode (Deprecated)
|
||||
#define NVLINK_SUBLINK_STATE_TX_TRAIN 0x5 // TX training
|
||||
#define NVLINK_SUBLINK_STATE_TX_SAFE 0x6 // TX Safe Mode
|
||||
#define NVLINK_SUBLINK_STATE_TX_OFF 0x7 // TX OFF
|
||||
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE 0x8 // TX common mode enable
|
||||
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE 0x9 // TX common mode disable
|
||||
#define NVLINK_SUBLINK_STATE_TX_DATA_READY 0xA // Do Data Ready and Data Enable
|
||||
#define NVLINK_SUBLINK_STATE_TX_EQ 0xB // TX equalization
|
||||
#define NVLINK_SUBLINK_STATE_TX_PRBS_EN 0xC // TX IOBIST PRBS generator enable
|
||||
#define NVLINK_SUBLINK_STATE_TX_POST_HS 0xD // TX Post High Speed settings
|
||||
|
||||
// NVLINK RX SUBLINK states
|
||||
#define NVLINK_SUBLINK_STATE_RX_HS 0x0 // RX High Speed
|
||||
#define NVLINK_SUBLINK_STATE_RX_SINGLE_LANE 0x4 // RX Single Lane (1/8th or 1/4th) Mode (Deprecated)
|
||||
#define NVLINK_SUBLINK_STATE_RX_TRAIN 0x5 // RX training
|
||||
#define NVLINK_SUBLINK_STATE_RX_SAFE 0x6 // RX Safe Mode
|
||||
#define NVLINK_SUBLINK_STATE_RX_OFF 0x7 // RX OFF
|
||||
#define NVLINK_SUBLINK_STATE_RX_RXCAL 0x8 // RX in calibration
|
||||
#define NVLINK_SUBLINK_STATE_RX_INIT_TERM 0x9 // Enable RX termination
|
||||
|
||||
// NVLINK TX SUBLINK sub-states
|
||||
#define NVLINK_SUBLINK_SUBSTATE_TX_STABLE 0x0 // TX Stable
|
||||
|
||||
// NVLINK RX SUBLINK sub-states
|
||||
#define NVLINK_SUBLINK_SUBSTATE_RX_STABLE 0x0 // RX Stable
|
||||
|
||||
// State change flags
|
||||
#define NVLINK_STATE_CHANGE_ASYNC 0x0 // Don't wait for the state change to complete
|
||||
#define NVLINK_STATE_CHANGE_SYNC 0x1 // Wait for the state change to complete
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/***************************** NVLink library management functions ******************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Check if the nvlink core library is initialized
|
||||
*/
|
||||
NvBool nvlink_lib_is_initialized(void);
|
||||
|
||||
/*
|
||||
* Check if there are no devices registered
|
||||
*/
|
||||
NvBool nvlink_lib_is_device_list_empty(void);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/************************** NVLink library driver-side interface ********************************/
|
||||
/***************** Manages device and link registration and un-registration *********************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Associates device in the NVLink Core
|
||||
* During the call, the calling driver must support callbacks into the driver from Core
|
||||
*/
|
||||
NvlStatus nvlink_lib_register_device(nvlink_device *dev);
|
||||
|
||||
/*
|
||||
* Unassociates device in the NVLink Core
|
||||
* Includes removing any links related to the device if still registered
|
||||
* During the call, the calling driver must support callbacks into the driver from Core
|
||||
*/
|
||||
NvlStatus nvlink_lib_unregister_device(nvlink_device *dev);
|
||||
|
||||
|
||||
/*
|
||||
* Associates link with a device in the NVLink Core
|
||||
* During the call, the calling driver must support callbacks into the driver from Core
|
||||
*/
|
||||
NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Unassociates link from a device in the NVLink Core
|
||||
* During the call, the calling driver must support callbacks into the driver from Core
|
||||
*/
|
||||
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/******************************* NVLink link management functions *******************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Check if the device has no links registered
|
||||
*/
|
||||
NvBool nvlink_lib_is_link_list_empty(nvlink_device *dev);
|
||||
|
||||
/*
|
||||
* Get the link associated with the given device's link number
|
||||
*/
|
||||
NvlStatus nvlink_lib_get_link(nvlink_device *device,
|
||||
NvU32 link_id,
|
||||
nvlink_link **link);
|
||||
|
||||
/*
|
||||
* Set the link endpoint as the link master
|
||||
*/
|
||||
NvlStatus nvlink_lib_set_link_master(nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Get the link master associated with this endpoint
|
||||
*/
|
||||
NvlStatus nvlink_lib_get_link_master(nvlink_link *link, nvlink_link **master);
|
||||
|
||||
/************************************************************************************************/
|
||||
/*************************** NVLink topology discovery functions ********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Get the connected remote endpoint information
|
||||
* For a given link, return the other endpoint details it is connected
|
||||
* to. If there is no connection associated with the given link, then
|
||||
* conn_info.connected member will be NV_FALSE.
|
||||
*
|
||||
* Note: This routine will not initiate any link initialization or topology
|
||||
* discovery.
|
||||
*/
|
||||
NvlStatus nvlink_lib_get_remote_conn_info(nvlink_link *link, nvlink_conn_info *conn_info);
|
||||
|
||||
/*
|
||||
* Get the connected remote endpoint information
|
||||
* For a given end of a link, returns the device and link information
|
||||
* for the remote end along with a boolean variable that specifies if
|
||||
* the topology detection was complete
|
||||
*/
|
||||
NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end,
|
||||
nvlink_conn_info *conn_info,
|
||||
NvU32 flags);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/****************************** NVLink initialization functions *********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Re-init a given link from OFF to SWCFG
|
||||
*/
|
||||
NvlStatus nvlink_lib_reinit_link_from_off_to_swcfg(nvlink_link *link,
|
||||
NvU32 flags);
|
||||
|
||||
/************************************************************************************************/
|
||||
/********************************** NVLink training functions ***********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* Train a given set of links from SWCFG to ACTIVE state
|
||||
* a. For low training latency - caller passes all links as an array
|
||||
* b. For high training latency - caller passes link one by one
|
||||
*/
|
||||
NvlStatus nvlink_lib_train_links_from_swcfg_to_active(nvlink_link **links,
|
||||
NvU32 linkCount,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* Train a given set of links of a device from L2 to ACTIVE state
|
||||
*/
|
||||
NvlStatus nvlink_lib_train_links_from_L2_to_active(nvlink_device *dev,
|
||||
NvU32 linkMask,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* Retrain a given link from SWCFG to ACTIVE
|
||||
*/
|
||||
NvlStatus nvlink_lib_retrain_link_from_swcfg_to_active(nvlink_link *link,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* Save the seed Data passed in from an endpoint driver
|
||||
*/
|
||||
NvlStatus nvlink_lib_save_training_seeds(nvlink_link * link,
|
||||
NvU32 * seedData);
|
||||
NvlStatus nvlink_lib_copy_training_seeds(nvlink_link * link,
|
||||
NvU32 * seedDataCopy);
|
||||
|
||||
/*
|
||||
* Send the endpoint driver back the seeds we have stored
|
||||
*/
|
||||
void nvlink_lib_restore_training_seeds(nvlink_link * link,
|
||||
NvU32 * seedData);
|
||||
|
||||
/*
|
||||
* Check that the requested links have trained to active
|
||||
*/
|
||||
NvlStatus nvlink_lib_check_training_complete(nvlink_link **links,
|
||||
NvU32 linkCount);
|
||||
|
||||
/************************************************************************************************/
|
||||
/********************************** NVLink shutdown functions ***********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/*
|
||||
* [CLEAN SHUTDOWN]
|
||||
* Shutdown given links of a device from active to L2 state
|
||||
*/
|
||||
NvlStatus nvlink_lib_powerdown_links_from_active_to_L2(nvlink_device *dev,
|
||||
NvU32 linkMask,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* [PSEUDO-CLEAN SHUTDOWN]
|
||||
* Shutdown the given array of links from ACTIVE to OFF state
|
||||
*/
|
||||
NvlStatus nvlink_lib_powerdown_links_from_active_to_off(nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* Power down the given array of links from ACTIVE to SWCFG state
|
||||
*/
|
||||
NvlStatus nvlink_lib_powerdown_links_from_active_to_swcfg(nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags);
|
||||
|
||||
/*
|
||||
* Reset the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_lib_reset_links(nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags);
|
||||
|
||||
|
||||
/*
|
||||
* Nvlink core library structure iterators
|
||||
*/
|
||||
|
||||
#define FOR_EACH_DEVICE_REGISTERED(dev, head, node) \
|
||||
nvListForEachEntry(dev, &head.node, node)
|
||||
|
||||
#define FOR_EACH_LINK_REGISTERED(link, dev, node) \
|
||||
nvListForEachEntry(link, &dev->link_list, node)
|
||||
|
||||
#define FOR_EACH_LINK_REGISTERED_SAFE(link, next, dev, node) \
|
||||
nvListForEachEntry_safe(link, next, &dev->link_list, node)
|
||||
|
||||
#define FOR_EACH_CONNECTION(conn, head, node) \
|
||||
nvListForEachEntry(conn, &head.node, node)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _NVLINK_H_
|
||||
173
src/common/nvlink/interface/nvlink_common.h
Normal file
173
src/common/nvlink/interface/nvlink_common.h
Normal file
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_COMMON_H_
|
||||
#define _NVLINK_COMMON_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvCpuUuid.h"
|
||||
#include "nvlink_errors.h"
|
||||
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
|
||||
// nvlink pci bar information
|
||||
struct nvlink_pci_bar_info
|
||||
{
|
||||
NvU64 busAddress;
|
||||
NvU64 baseAddr;
|
||||
NvU64 barSize;
|
||||
NvU32 offset;
|
||||
void *pBar;
|
||||
};
|
||||
|
||||
#define MAX_NVLINK_BARS 2
|
||||
|
||||
// nvlink pci information
|
||||
struct nvlink_pci_info
|
||||
{
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
NvU8 device;
|
||||
NvU8 function;
|
||||
NvU32 pciDeviceId;
|
||||
NvU32 irq;
|
||||
NvBool intHooked;
|
||||
struct nvlink_pci_bar_info bars[MAX_NVLINK_BARS];
|
||||
};
|
||||
|
||||
// nvlink detailed device information
|
||||
struct nvlink_detailed_device_info
|
||||
{
|
||||
char *deviceName;
|
||||
NvU64 deviceType;
|
||||
NvU8 *devUuid;
|
||||
NvBool bInitialized;
|
||||
void *dev_info; // Endpoint driver device info opaque
|
||||
// to core lib. Passed from end point
|
||||
// driver to core
|
||||
|
||||
struct nvlink_pci_info *pciInfo;
|
||||
};
|
||||
|
||||
// nvlink device registration parameters
|
||||
struct nvlink_device_register_params
|
||||
{
|
||||
//
|
||||
// Core lib device info opaque to endpoint driver
|
||||
// Passed from core lib to endpoint driver
|
||||
//
|
||||
void **deviceHandle;
|
||||
char *driverName;
|
||||
|
||||
struct nvlink_detailed_device_info *device_params;
|
||||
};
|
||||
|
||||
// nvlink detailed link information
|
||||
struct nvlink_detailed_link_info
|
||||
{
|
||||
void *deviceHandle; // Core library device handle passed
|
||||
// to endpoint driver during device
|
||||
// registration
|
||||
|
||||
void *link_info; // End point driver link info opaque
|
||||
// to core lib. Passed from end point
|
||||
// driver to core
|
||||
|
||||
char *linkName;
|
||||
NvU32 linkNumber;
|
||||
NvU32 version;
|
||||
NvBool bAcCoupled;
|
||||
const void *link_handlers;
|
||||
};
|
||||
|
||||
// nvlink link registration parameters
|
||||
struct nvlink_link_register_params
|
||||
{
|
||||
//
|
||||
// Core lib link info opaque to endpoint driver
|
||||
// Passed from core lib to endpoint driver
|
||||
//
|
||||
void **linkHandle;
|
||||
|
||||
struct nvlink_detailed_link_info *link_params;
|
||||
};
|
||||
|
||||
// nvlink client device handle
|
||||
struct nvlink_device_handle
|
||||
{
|
||||
NvU32 linkMask;
|
||||
struct nvlink_pci_info pciInfo;
|
||||
};
|
||||
|
||||
#define NVLINK_PCI_DEV_FMT "%04x:%02x:%02x.%x"
|
||||
#define NVLINK_PCI_DEV_FMT_ARGS(info) (info)->domain, \
|
||||
(info)->bus, \
|
||||
(info)->device, \
|
||||
(info)->function
|
||||
|
||||
// nvlink connection information
|
||||
struct nvlink_conn_info
|
||||
{
|
||||
NvU32 domain;
|
||||
NvU16 bus;
|
||||
NvU16 device;
|
||||
NvU16 function;
|
||||
NvU32 pciDeviceId;
|
||||
NvU8 devUuid[NV_UUID_LEN];
|
||||
NvU64 deviceType;
|
||||
NvU32 linkNumber;
|
||||
NvBool bConnected;
|
||||
NvU64 chipSid;
|
||||
};
|
||||
|
||||
// nvlink ioctrl params
|
||||
struct nvlink_ioctrl_params
|
||||
{
|
||||
void *osPrivate;
|
||||
NvU32 cmd;
|
||||
void *buf;
|
||||
NvU32 size;
|
||||
};
|
||||
|
||||
// Typedefs
|
||||
typedef struct nvlink_pci_bar_info nvlink_pci_bar_info;
|
||||
typedef struct nvlink_pci_info nvlink_pci_info;
|
||||
typedef struct nvlink_detailed_device_info nvlink_detailed_device_info;
|
||||
typedef struct nvlink_detailed_link_info nvlink_detailed_link_info;
|
||||
typedef struct nvlink_device_register_params nvlink_device_register_params;
|
||||
typedef struct nvlink_link_register_params nvlink_link_register_params;
|
||||
typedef struct nvlink_conn_info nvlink_conn_info;
|
||||
typedef struct nvlink_ioctrl_params nvlink_ioctrl_params;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_NVLINK_COMMON_H_
|
||||
47
src/common/nvlink/interface/nvlink_errors.h
Normal file
47
src/common/nvlink/interface/nvlink_errors.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_ERRORS_H_
|
||||
#define _NVLINK_ERRORS_H_
|
||||
|
||||
typedef int NvlStatus;
|
||||
|
||||
#define NVL_SUCCESS (NvlStatus) 0
|
||||
#define NVL_BAD_ARGS (NvlStatus) 1
|
||||
#define NVL_NO_MEM (NvlStatus) 2
|
||||
#define NVL_NOT_FOUND (NvlStatus) 3
|
||||
#define NVL_INITIALIZATION_PARTIAL_FAILURE (NvlStatus) 4
|
||||
#define NVL_INITIALIZATION_TOTAL_FAILURE (NvlStatus) 5
|
||||
#define NVL_PCI_ERROR (NvlStatus) 6
|
||||
#define NVL_ERR_GENERIC (NvlStatus) 7
|
||||
#define NVL_ERR_INVALID_STATE (NvlStatus) 8
|
||||
#define NVL_UNBOUND_DEVICE (NvlStatus) 9
|
||||
#define NVL_MORE_PROCESSING_REQUIRED (NvlStatus)10
|
||||
#define NVL_IO_ERROR (NvlStatus)11
|
||||
#define NVL_ERR_STATE_IN_USE (NvlStatus)12
|
||||
#define NVL_ERR_NOT_SUPPORTED (NvlStatus)13
|
||||
#define NVL_ERR_NOT_IMPLEMENTED (NvlStatus)14
|
||||
#define NVL_ERR_INSUFFICIENT_PERMISSIONS (NvlStatus)15
|
||||
#define NVL_ERR_OPERATING_SYSTEM (NvlStatus)16
|
||||
|
||||
#endif // _NVLINK_ERRORS_H_
|
||||
53
src/common/nvlink/interface/nvlink_export.h
Normal file
53
src/common/nvlink/interface/nvlink_export.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_EXPORT_H_
|
||||
#define _NVLINK_EXPORT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvlink_common.h"
|
||||
|
||||
/*
|
||||
* Initializes core lib and does all that is needed
|
||||
* to access NVLINK functionality on the current platform.
|
||||
*/
|
||||
NvlStatus nvlink_lib_initialize(void);
|
||||
|
||||
/*
|
||||
* Frees any related resources and then unloads core lib.
|
||||
*/
|
||||
NvlStatus nvlink_lib_unload(void);
|
||||
|
||||
/*
|
||||
* Entry point for nvlink ioctl calls.
|
||||
*/
|
||||
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_NVLINK_EXPORT_H_
|
||||
1157
src/common/nvlink/interface/nvlink_lib_ctrl.h
Normal file
1157
src/common/nvlink/interface/nvlink_lib_ctrl.h
Normal file
File diff suppressed because it is too large
Load Diff
90
src/common/nvlink/interface/nvlink_lock.h
Normal file
90
src/common/nvlink/interface/nvlink_lock.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _NVLINK_LOCK_H_
|
||||
#define _NVLINK_LOCK_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvlink_common.h"
|
||||
|
||||
/*
|
||||
* Allocate top level lock. Return NVL_SUCCESS if
|
||||
* the lock was allocated else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_top_lock_alloc(void);
|
||||
|
||||
/*
|
||||
* Free top level lock. Return NVL_SUCCESS if
|
||||
* the lock was freed else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_top_lock_free(void);
|
||||
|
||||
/*
|
||||
* Allocate per-link lock. Return NVL_SUCCESS if
|
||||
* the lock was allocated else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_link_lock_alloc(nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Free per-link lock. Return NVL_SUCCESS if
|
||||
* the lock was freed else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_link_lock_free(nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Acquire top level lock. Return NVL_SUCCESS if
|
||||
* the lock was acquired else return NVL_ERR_STATE_IN_USE.
|
||||
*/
|
||||
NvlStatus nvlink_lib_top_lock_acquire(void);
|
||||
|
||||
/*
|
||||
* Release top level lock. Return NVL_SUCCESS if
|
||||
* the lock was released else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_top_lock_release(void);
|
||||
|
||||
/*
|
||||
* Sort the array of links in order of (DBDF, link#) -
|
||||
* lowest to highest and acquire link locks.
|
||||
* Return NVL_SUCCESS if all the link locks were acquired.
|
||||
* Else if any link lock failed to be acquired, release
|
||||
* all acquired link locks and return NVL_ERR_STATE_IN_USE.
|
||||
*/
|
||||
NvlStatus nvlink_lib_link_locks_acquire(nvlink_link **links, int numLinks);
|
||||
|
||||
/*
|
||||
* Loop over all the links and call nvlink_releaseLock(links[i]->linkLock).
|
||||
* Return NVL_SUCCESS if all the link locks were released.
|
||||
* Else if any link lock failed to be released return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus nvlink_lib_link_locks_release(nvlink_link **links, int numLinks);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _NVLINK_LOCK_H_
|
||||
86
src/common/nvlink/interface/nvlink_os.h
Normal file
86
src/common/nvlink/interface/nvlink_os.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_OS_H_
|
||||
#define _NVLINK_OS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nvlink_common.h"
|
||||
|
||||
#define NVLINK_FREE(x) nvlink_free((void *)x)
|
||||
|
||||
// Memory management functions
|
||||
void * nvlink_malloc(NvLength);
|
||||
void nvlink_free(void *);
|
||||
void * nvlink_memset(void *, int, NvLength);
|
||||
void * nvlink_memcpy(void *, const void *, NvLength);
|
||||
int nvlink_memcmp(const void *, const void *, NvLength);
|
||||
NvU32 nvlink_memRd32(const volatile void *);
|
||||
void nvlink_memWr32(volatile void *, NvU32);
|
||||
NvU64 nvlink_memRd64(const volatile void *);
|
||||
void nvlink_memWr64(volatile void *, NvU64);
|
||||
|
||||
// String management functions
|
||||
char * nvlink_strcpy(char *, const char *);
|
||||
NvLength nvlink_strlen(const char *);
|
||||
int nvlink_strcmp(const char *, const char *);
|
||||
int nvlink_snprintf(char *, NvLength, const char *, ...);
|
||||
|
||||
// Locking support functions
|
||||
void * nvlink_allocLock(void);
|
||||
void nvlink_acquireLock(void *);
|
||||
NvBool nvlink_isLockOwner(void *);
|
||||
void nvlink_releaseLock(void *);
|
||||
void nvlink_freeLock(void *);
|
||||
|
||||
// Miscellaneous functions
|
||||
void nvlink_assert(int expression);
|
||||
void nvlink_sleep(unsigned int ms);
|
||||
void nvlink_print(const char *, int, const char *, int, const char *, ...);
|
||||
int nvlink_is_admin(void);
|
||||
|
||||
// Capability functions
|
||||
NvlStatus nvlink_acquire_fabric_mgmt_cap(void *osPrivate, NvU64 capDescriptor);
|
||||
int nvlink_is_fabric_manager(void *osPrivate);
|
||||
|
||||
#define NVLINK_DBG_LEVEL_INFO 0x0
|
||||
#define NVLINK_DBG_LEVEL_SETUP 0x1
|
||||
#define NVLINK_DBG_LEVEL_USERERRORS 0x2
|
||||
#define NVLINK_DBG_LEVEL_WARNINGS 0x3
|
||||
#define NVLINK_DBG_LEVEL_ERRORS 0x4
|
||||
|
||||
#define NVLINK_DBG_WHERE __FILE__, __LINE__, __FUNCTION__
|
||||
#define NVLINK_DBG_INFO NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_INFO
|
||||
#define NVLINK_DBG_SETUP NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_SETUP
|
||||
#define NVLINK_DBG_USERERRORS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_USERERRORS
|
||||
#define NVLINK_DBG_WARNINGS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_WARNINGS
|
||||
#define NVLINK_DBG_ERRORS NVLINK_DBG_WHERE, NVLINK_DBG_LEVEL_ERRORS
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_NVLINK_OS_H_
|
||||
527
src/common/nvlink/kernel/nvlink/core/nvlink_conn_mgmt.c
Normal file
527
src/common/nvlink/kernel/nvlink/core/nvlink_conn_mgmt.c
Normal file
@@ -0,0 +1,527 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
/**
|
||||
* For a given link, return the associated intranode connection
|
||||
*
|
||||
* @param[in] endpoint NVLink Link pointer
|
||||
* @param[out] conn Connection associated with the link
|
||||
*/
|
||||
void
|
||||
nvlink_core_get_intranode_conn
|
||||
(
|
||||
nvlink_link *endpoint,
|
||||
nvlink_intranode_conn **conn
|
||||
)
|
||||
{
|
||||
nvlink_intranode_conn *tmpConn = NULL;
|
||||
|
||||
FOR_EACH_CONNECTION(tmpConn, nvlinkLibCtx.nv_intraconn_head, node)
|
||||
{
|
||||
if (tmpConn->end0 == endpoint || tmpConn->end1 == endpoint)
|
||||
{
|
||||
*conn = tmpConn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For a given local link, return the associated internode connection
|
||||
*
|
||||
* @param[in] localLink NVLink Link pointer
|
||||
* @param[out] conn Connection associated with the link
|
||||
*/
|
||||
void
|
||||
nvlink_core_get_internode_conn
|
||||
(
|
||||
nvlink_link *localLink,
|
||||
nvlink_internode_conn **conn
|
||||
)
|
||||
{
|
||||
nvlink_internode_conn *tmpConn = NULL;
|
||||
|
||||
FOR_EACH_CONNECTION(tmpConn, nvlinkLibCtx.nv_interconn_head, node)
|
||||
{
|
||||
if (tmpConn->local_end == localLink)
|
||||
{
|
||||
*conn = tmpConn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new intranode connection to the list of connections
|
||||
*
|
||||
* @param[in] end0 NVLink Link pointer for end0
|
||||
* @param[in] end1 NVLink Link pointer for end1
|
||||
*
|
||||
* return NVL_SUCCESS if the conn was added successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_add_intranode_conn
|
||||
(
|
||||
nvlink_link *end0,
|
||||
nvlink_link *end1
|
||||
)
|
||||
{
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
|
||||
// don't do anything if we have an intranode connecction
|
||||
nvlink_core_get_intranode_conn(end0, &conn);
|
||||
|
||||
if (conn != NULL)
|
||||
{
|
||||
// Verify that the other end of the connection is indeed end1
|
||||
conn->end0 == end0 ?
|
||||
nvlink_assert(conn->end1 == end1) :
|
||||
nvlink_assert(conn->end0 == end1);
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"Adding new NVLink intranode connection between %s:%s and %s:%s\n",
|
||||
end0->dev->deviceName, end0->linkName,
|
||||
end1->dev->deviceName, end1->linkName));
|
||||
|
||||
// create a new intranode connection object
|
||||
conn = (nvlink_intranode_conn*)nvlink_malloc(sizeof(nvlink_intranode_conn));
|
||||
if (conn == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"Adding NVLink intranode connection failed "
|
||||
"due to memory allocation error\n"));
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
nvlink_memset(conn, 0, sizeof(nvlink_intranode_conn));
|
||||
|
||||
// Initialize the node for the connection
|
||||
nvListInit(&conn->node);
|
||||
|
||||
// Initialize the connection endpoints
|
||||
conn->end0 = end0;
|
||||
conn->end1 = end1;
|
||||
|
||||
// Add the connection to the list of connections
|
||||
nvListAppend(&conn->node, &nvlinkLibCtx.nv_intraconn_head.node);
|
||||
|
||||
//
|
||||
// Update the count of connected endpoints
|
||||
// Loopback link, increment by 1
|
||||
// Non loopback link, increment by 2
|
||||
//
|
||||
nvlinkLibCtx.connectedEndpoints = ( end0 == end1 ?
|
||||
nvlinkLibCtx.connectedEndpoints + 1:
|
||||
nvlinkLibCtx.connectedEndpoints + 2 );
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new internode connection to the list of internode connections
|
||||
*
|
||||
* Note: As of now, no stats/count for internode connections.
|
||||
*
|
||||
* @param[in] localLink NVLink Link pointer for one end
|
||||
* @param[in] remoteEndPoint Remote endpoint
|
||||
*
|
||||
* return NVL_SUCCESS if the conn was added succesfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_add_internode_conn
|
||||
(
|
||||
nvlink_link *localLink,
|
||||
nvlink_remote_endpoint_info *remoteEndPoint
|
||||
)
|
||||
{
|
||||
nvlink_internode_conn *conn = NULL;
|
||||
|
||||
// Don't do anything if we have an internode connecction for local link
|
||||
nvlink_core_get_internode_conn(localLink, &conn);
|
||||
if (conn != NULL)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// create a new connection
|
||||
conn = (nvlink_internode_conn *)nvlink_malloc(sizeof(nvlink_internode_conn));
|
||||
if (conn == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"Adding nvlink internode connection failed"
|
||||
" due to memory allocation error\n"));
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
nvlink_memset(conn, 0, sizeof(nvlink_internode_conn));
|
||||
|
||||
// initialize the node for the connection list
|
||||
nvListInit(&conn->node);
|
||||
|
||||
// copy/assign the connection endpoints information
|
||||
conn->local_end = localLink;
|
||||
nvlink_memcpy(&conn->remote_end,
|
||||
remoteEndPoint,
|
||||
sizeof(nvlink_remote_endpoint_info));
|
||||
|
||||
// add the connection to the list of internode connections
|
||||
nvListAppend(&conn->node, &nvlinkLibCtx.nv_interconn_head.node);
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the connection from the list of intranode connections
|
||||
*
|
||||
* @param[in] conn NVLink connection pointer
|
||||
*/
|
||||
void
|
||||
nvlink_core_remove_intranode_conn
|
||||
(
|
||||
nvlink_intranode_conn *conn
|
||||
)
|
||||
{
|
||||
// Remove the connection from the list of connections
|
||||
nvListDel(&conn->node);
|
||||
|
||||
//
|
||||
// Update the count of connected endpoints
|
||||
// Loopback link, decrement by 1
|
||||
// Non loopback link, decrement by 2
|
||||
//
|
||||
nvlinkLibCtx.connectedEndpoints = ( conn->end0 == conn->end1 ?
|
||||
nvlinkLibCtx.connectedEndpoints - 1:
|
||||
nvlinkLibCtx.connectedEndpoints - 2 );
|
||||
|
||||
//
|
||||
// Update the count of notConnected endpoints
|
||||
// Loopback link, do nothing
|
||||
// Non-loopback link, increment by 1
|
||||
//
|
||||
nvlinkLibCtx.notConnectedEndpoints = ( conn->end0 != conn->end1 ?
|
||||
nvlinkLibCtx.notConnectedEndpoints + 1:
|
||||
nvlinkLibCtx.notConnectedEndpoints );
|
||||
|
||||
nvlink_free((void *)conn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the connection from the list of internode connections
|
||||
*
|
||||
* @param[in] localLink NVLink link pointer
|
||||
*/
|
||||
void
|
||||
nvlink_core_remove_internode_conn
|
||||
(
|
||||
nvlink_link *localLink
|
||||
)
|
||||
{
|
||||
nvlink_internode_conn *conn = NULL;
|
||||
|
||||
nvlink_core_get_internode_conn(localLink, &conn);
|
||||
|
||||
if (conn != NULL)
|
||||
{
|
||||
nvListDel(&conn->node);
|
||||
nvlink_free((void *)conn);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the given intranode connection is in the specified mode
|
||||
*
|
||||
* @param[in] conn NVLink Connection pointer
|
||||
* @param[in] linkMode Link mode
|
||||
*
|
||||
* return NVL_SUCCESS if the conn is in the given state
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_check_intranode_conn_state
|
||||
(
|
||||
nvlink_intranode_conn *conn,
|
||||
NvU64 linkMode
|
||||
)
|
||||
{
|
||||
switch (linkMode)
|
||||
{
|
||||
case NVLINK_LINKSTATE_OFF:
|
||||
{
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_OFF)) &&
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_OFF)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link already in OFF state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if only one end of connection is OFF
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_OFF)) ||
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_OFF)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
case NVLINK_LINKSTATE_RESET:
|
||||
{
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_RESET)) &&
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_RESET)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link already in RESET state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if only one end of connection is RESET
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_RESET)) ||
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_RESET)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
case NVLINK_LINKSTATE_SAFE:
|
||||
{
|
||||
// Check if both ends and their sublinks are already in SAFE mode
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SAFE)) &&
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SAFE)))
|
||||
{
|
||||
if ((nvlink_core_check_tx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_OFF)) &&
|
||||
(nvlink_core_check_tx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_OFF)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_OFF)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_OFF)))
|
||||
{
|
||||
//
|
||||
// If links are in safe, check if sublinks are in off
|
||||
// if so, we had performed pseudo-clean shutdown
|
||||
//
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is not in SAFE mode. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
else if (!((nvlink_core_check_tx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE)) &&
|
||||
(nvlink_core_check_tx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE))))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Sublinks are in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if only one end of connection is in SAFE mode
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SAFE)) ||
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SAFE)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is not in SAFE mode. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
case NVLINK_LINKSTATE_HS:
|
||||
{
|
||||
// Check if both ends and their sublinks are already in HS mode
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_HS)) &&
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_HS)))
|
||||
{
|
||||
if (!((nvlink_core_check_tx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_HS)) &&
|
||||
(nvlink_core_check_tx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_HS)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_HS)) &&
|
||||
(nvlink_core_check_rx_sublink_state(conn->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_HS))))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Sublinks are in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_HS)) ||
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_HS)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is not in HIGH SPEED mode. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
case NVLINK_LINKSTATE_SLEEP:
|
||||
{
|
||||
// Check if both ends of connection are already in SLEEP mode
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SLEEP)) &&
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SLEEP)))
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if only one end of connection is in SLEEP mode
|
||||
if ((nvlink_core_check_link_state(conn->end0, NVLINK_LINKSTATE_SLEEP)) ||
|
||||
(nvlink_core_check_link_state(conn->end1, NVLINK_LINKSTATE_SLEEP)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is in bad state. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is not in SLEEP mode. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conn);
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the intranode connection's remote endpoint information into
|
||||
* the nvlink_conn_info structure passed in
|
||||
*
|
||||
* @param[in] remote_end NVLink Link pointer
|
||||
* @param[in] conn_info Details of remote endpoint
|
||||
*/
|
||||
void
|
||||
nvlink_core_copy_intranode_conn_info
|
||||
(
|
||||
nvlink_link *remote_end,
|
||||
nvlink_conn_info *conn_info
|
||||
)
|
||||
{
|
||||
// copy the remote device pci information
|
||||
conn_info->domain = remote_end->dev->pciInfo.domain;
|
||||
conn_info->bus = remote_end->dev->pciInfo.bus;
|
||||
conn_info->device = remote_end->dev->pciInfo.device;
|
||||
conn_info->function = remote_end->dev->pciInfo.function;
|
||||
conn_info->pciDeviceId = remote_end->dev->pciInfo.pciDeviceId;
|
||||
conn_info->chipSid = remote_end->localSid;
|
||||
|
||||
// copy the device type
|
||||
conn_info->deviceType = remote_end->dev->type;
|
||||
|
||||
// copy the remote device uuid
|
||||
if (remote_end->dev->uuid != NULL)
|
||||
{
|
||||
nvlink_memcpy(conn_info->devUuid, remote_end->dev->uuid, NV_UUID_LEN);
|
||||
}
|
||||
|
||||
// copy the remote link number
|
||||
conn_info->linkNumber = remote_end->linkNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the internode connection's remote endpoint information into
|
||||
* the nvlink_conn_info structure passed in
|
||||
*
|
||||
* @param[in] remote_end NVLink Link pointer
|
||||
* @param[in] conn_info Details of remote endpoint
|
||||
*/
|
||||
void
|
||||
nvlink_core_copy_internode_conn_info
|
||||
(
|
||||
nvlink_remote_endpoint_info *remote_end,
|
||||
nvlink_conn_info *conn_info
|
||||
)
|
||||
{
|
||||
// copy the remote device pci information
|
||||
conn_info->domain = remote_end->pciInfo.domain;
|
||||
conn_info->bus = remote_end->pciInfo.bus;
|
||||
conn_info->device = remote_end->pciInfo.device;
|
||||
conn_info->function = remote_end->pciInfo.function;
|
||||
conn_info->pciDeviceId = 0;
|
||||
|
||||
// copy the device type
|
||||
conn_info->deviceType = remote_end->devType;
|
||||
|
||||
// copy the remote device uuid
|
||||
nvlink_memcpy(conn_info->devUuid, remote_end->devUuid, NV_UUID_LEN);
|
||||
|
||||
// copy the remote link number
|
||||
conn_info->linkNumber = remote_end->linkIndex;
|
||||
}
|
||||
383
src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c
Normal file
383
src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c
Normal file
@@ -0,0 +1,383 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
static NvBool _nvlink_core_all_links_initialized(void);
|
||||
static void _nvlink_core_discover_topology(void);
|
||||
|
||||
/**
|
||||
* Get the remote end of the link
|
||||
*
|
||||
* For a given end of a link, returns the other end its connected to.
|
||||
*
|
||||
* Note: This function shouldn't be called when external fabric management is
|
||||
* enabled in the endpoint drivers. Unfortunately, there is no graceful
|
||||
* way to know that in the NVLink driver beforehand (during module load).
|
||||
*
|
||||
* @param[in] end NVLink Link pointer
|
||||
* @param[out] remote_end Remote endpoint of the connection
|
||||
* @param[in] flags Flags
|
||||
*/
|
||||
void
|
||||
nvlink_core_discover_and_get_remote_end
|
||||
(
|
||||
nvlink_link *end,
|
||||
nvlink_link **remote_end,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
nvlink_device *dev = NULL;
|
||||
nvlink_link *link = NULL;
|
||||
NvU32 linkCount = 0;
|
||||
nvlink_link **pLinks = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (pLinks == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvlinkLibCtx.bNewEndpoints)
|
||||
{
|
||||
if (!_nvlink_core_all_links_initialized())
|
||||
{
|
||||
// Initialize the links to SWCFG mode
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (linkCount >= NVLINK_MAX_SYSTEM_LINK_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: linkCount >= NVLINK_MAX_SYSTEM_LINK_NUM",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_assert(0);
|
||||
nvlink_free((void *)pLinks);
|
||||
return;
|
||||
}
|
||||
|
||||
pLinks[linkCount++] = link;
|
||||
}
|
||||
}
|
||||
{
|
||||
nvlink_core_init_links_from_off_to_swcfg(pLinks, linkCount, flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Re-discover the nvlink topology
|
||||
_nvlink_core_discover_topology();
|
||||
}
|
||||
|
||||
// Get the connection for the endpoint
|
||||
nvlink_core_get_intranode_conn(end, &conn);
|
||||
|
||||
if (conn != NULL)
|
||||
{
|
||||
*remote_end = (conn->end0 == end ? conn->end1 : conn->end0);
|
||||
}
|
||||
|
||||
if (pLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *) pLinks);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Discovery process to determine topology
|
||||
*
|
||||
* Involves sending and reading back AN0 packets/SID values
|
||||
*/
|
||||
static void
|
||||
_nvlink_core_discover_topology(void)
|
||||
{
|
||||
nvlink_device *dev0 = NULL;
|
||||
nvlink_device *dev1 = NULL;
|
||||
nvlink_link *end0 = NULL;
|
||||
nvlink_link *end1 = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
|
||||
NvBool isTokenFound = NV_FALSE;
|
||||
NvU64 token = 0;
|
||||
|
||||
nvlinkLibCtx.notConnectedEndpoints = 0;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(dev0, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(end0, dev0, node)
|
||||
{
|
||||
//
|
||||
// If receiver detect failed for the link or if clocks could not be set
|
||||
// up for the link, then move to next link
|
||||
//
|
||||
if (!end0->bRxDetected || end0->bTxCommonModeFail)
|
||||
continue;
|
||||
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(end0, &conn);
|
||||
if (conn != NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (end0->packet_injection_retries > NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Packet injection retries reached for %s:%s.\n",
|
||||
__FUNCTION__, end0->dev->deviceName, end0->linkName));
|
||||
nvlinkLibCtx.notConnectedEndpoints++;
|
||||
continue;
|
||||
}
|
||||
|
||||
end0->link_handlers->get_dl_link_mode(end0, &linkMode);
|
||||
|
||||
// Packet injection can only happen on links that are in SAFE or ACTIVE
|
||||
if (!((linkMode == NVLINK_LINKSTATE_SAFE) || (linkMode == NVLINK_LINKSTATE_HS)))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Packet injection only works for links in SAFE or HS %s:%s.\n",
|
||||
__FUNCTION__, end0->dev->deviceName, end0->linkName));
|
||||
nvlinkLibCtx.notConnectedEndpoints++;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// Send the AN0 packet
|
||||
// For Nvlink3.0, token mechanism is handled by Minion.
|
||||
// SW gets Sids values and so write_disocvery_token is Stubbed for Nvlink 3.0
|
||||
// We use the return value of write_discovery_token to shift between
|
||||
// Nvlink2.0 and NvLink3.0
|
||||
//
|
||||
if ((end0->version < NVLINK_DEVICE_VERSION_30) ||
|
||||
((end0->localSid == 0) || (end0->remoteSid == 0)))
|
||||
{
|
||||
end0->link_handlers->write_discovery_token(end0, end0->token);
|
||||
}
|
||||
end0->packet_injection_retries++;
|
||||
isTokenFound = NV_FALSE;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(dev1, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(end1, dev1, node)
|
||||
{
|
||||
//
|
||||
// If receiver detect failed for the link or if clocks could not be
|
||||
// set up for the link, then move to next link
|
||||
//
|
||||
if (!end1->bRxDetected || end1->bTxCommonModeFail)
|
||||
continue;
|
||||
|
||||
token = 0;
|
||||
|
||||
if ((end0->version >= NVLINK_DEVICE_VERSION_30) &&
|
||||
(end0->localSid != 0) && (end0->remoteSid != 0))
|
||||
{
|
||||
if ((end0->remoteSid == end1->localSid) &&
|
||||
(end0->remoteLinkId == end1->linkNumber))
|
||||
{
|
||||
// Make sure the below token check passes.
|
||||
token = end0->token;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Read the RX sublink for the AN0 packet
|
||||
end1->link_handlers->read_discovery_token(end1, (NvU64 *) &token);
|
||||
}
|
||||
|
||||
// If token matches, establish the connection
|
||||
if (token == end0->token)
|
||||
{
|
||||
isTokenFound = NV_TRUE;
|
||||
|
||||
//
|
||||
// If R4 tokens were used for NVLink3.0+, then mark initnegotiate
|
||||
// passed, since ALT training won't get kicked off without it.
|
||||
//
|
||||
if ((end0->version >= NVLINK_DEVICE_VERSION_30) &&
|
||||
((end0->localSid == 0) || (end0->remoteSid == 0)))
|
||||
{
|
||||
end0->bInitnegotiateConfigGood = NV_TRUE;
|
||||
end1->bInitnegotiateConfigGood = NV_TRUE;
|
||||
}
|
||||
|
||||
// Add to the connections list
|
||||
nvlink_core_add_intranode_conn(end0, end1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isTokenFound) break;
|
||||
}
|
||||
|
||||
if (nvlinkLibCtx.connectedEndpoints ==
|
||||
(nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.notConnectedEndpoints))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (nvlinkLibCtx.connectedEndpoints ==
|
||||
(nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.notConnectedEndpoints))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Are all links trained or is there a need to re-attempt training ?
|
||||
*
|
||||
* Returns true if all links trained and no need to re-attempt training
|
||||
* Returns false otherwise
|
||||
*/
|
||||
static NvBool
|
||||
_nvlink_core_all_links_initialized(void)
|
||||
{
|
||||
nvlink_device *dev = NULL;
|
||||
nvlink_link *link = NULL;
|
||||
NvU64 linkMode, txMode, rxMode;
|
||||
NvU32 txSubMode, rxSubMode;
|
||||
|
||||
if (nvlinkLibCtx.registeredEndpoints == 0)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: No links registered with nvlink core! Sleeping.\n",
|
||||
__FUNCTION__));
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
nvlinkLibCtx.endpointsInFail = 0;
|
||||
nvlinkLibCtx.endpointsInSafe = 0;
|
||||
nvlinkLibCtx.endpointsInActive = 0;
|
||||
|
||||
//
|
||||
// Get the current state of all endpoints. This determines
|
||||
// if some of the endpoints are still not trained to SAFE
|
||||
//
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (nvlinkLibCtx.bNewEndpoints)
|
||||
{
|
||||
link->safe_retries = 0;
|
||||
link->packet_injection_retries = 0;
|
||||
}
|
||||
|
||||
if (link->state == NVLINK_LINKSTATE_FAIL)
|
||||
{
|
||||
if (nvlinkLibCtx.bNewEndpoints)
|
||||
{
|
||||
//
|
||||
// New endpoints were detected. There may be a chance that
|
||||
// endpoints that failed previously may transition to safe
|
||||
//
|
||||
link->state = NVLINK_LINKSTATE_OFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
nvlinkLibCtx.endpointsInFail++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
linkMode = NVLINK_LINKSTATE_OFF;
|
||||
|
||||
if (link->link_handlers->get_dl_link_mode(link, &linkMode))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get link mode for %s:%s",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (linkMode == NVLINK_LINKSTATE_SAFE)
|
||||
{
|
||||
|
||||
//
|
||||
// Link is only truly in SAFE mode if link state and sublink state
|
||||
// is in SAFE/SWCFG.
|
||||
// After pseudo-clean shutdown, sublinks are in OFF, so they
|
||||
// need to be retrained to SAFE
|
||||
//
|
||||
if (link->link_handlers->get_tx_mode(link, &txMode, &txSubMode))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get tx sublink mode for %s:%s",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
}
|
||||
if (link->link_handlers->get_rx_mode(link, &rxMode, &rxSubMode))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get rx sublink mode for %s:%s",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
}
|
||||
|
||||
if ((txMode == NVLINK_SUBLINK_STATE_TX_OFF) ||
|
||||
(rxMode == NVLINK_SUBLINK_STATE_RX_OFF))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
link->bRxDetected = NV_TRUE;
|
||||
nvlinkLibCtx.endpointsInSafe++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (linkMode == NVLINK_LINKSTATE_HS)
|
||||
{
|
||||
link->bRxDetected = NV_TRUE;
|
||||
nvlinkLibCtx.endpointsInActive++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// New endpoints have been considered
|
||||
nvlinkLibCtx.bNewEndpoints = NV_FALSE;
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Registered Links = %d, nvlinkLibCtx.endpointsInSafe = %d, "
|
||||
" nvlinkLibCtx.endpointsInFail = %d, nvlinkLibCtx.endpointsInActive = %d\n",
|
||||
__FUNCTION__,
|
||||
nvlinkLibCtx.registeredEndpoints, nvlinkLibCtx.endpointsInSafe,
|
||||
nvlinkLibCtx.endpointsInFail, nvlinkLibCtx.endpointsInActive));
|
||||
|
||||
// Determine if all links are currently trained
|
||||
if ((nvlinkLibCtx.registeredEndpoints - nvlinkLibCtx.endpointsInFail -
|
||||
nvlinkLibCtx.endpointsInSafe - nvlinkLibCtx.endpointsInActive) == 0)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: All connected links are in trained\n",
|
||||
__FUNCTION__));
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
return NV_FALSE;
|
||||
}
|
||||
1402
src/common/nvlink/kernel/nvlink/core/nvlink_initialize.c
Normal file
1402
src/common/nvlink/kernel/nvlink/core/nvlink_initialize.c
Normal file
File diff suppressed because it is too large
Load Diff
692
src/common/nvlink/kernel/nvlink/core/nvlink_ioctl.c
Normal file
692
src/common/nvlink/kernel/nvlink/core/nvlink_ioctl.c
Normal file
@@ -0,0 +1,692 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvVer.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "nvlink_lib_ctrl.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
static nvlink_device_type
|
||||
_nvlink_core_map_device_type
|
||||
(
|
||||
NvU64 type
|
||||
)
|
||||
{
|
||||
nvlink_device_type devType;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case NVLINK_DEVICE_TYPE_IBMNPU:
|
||||
devType = nvlink_device_type_ibmnpu;
|
||||
break;
|
||||
case NVLINK_DEVICE_TYPE_GPU:
|
||||
devType = nvlink_device_type_gpu;
|
||||
break;
|
||||
case NVLINK_DEVICE_TYPE_NVSWITCH:
|
||||
devType = nvlink_device_type_nvswitch;
|
||||
break;
|
||||
default:
|
||||
devType = nvlink_device_type_unknown;
|
||||
break;
|
||||
}
|
||||
|
||||
return devType;
|
||||
}
|
||||
|
||||
static nvlink_link_mode
|
||||
_nvlink_core_map_link_state
|
||||
(
|
||||
NvU64 dlState,
|
||||
NvU64 tlState
|
||||
)
|
||||
{
|
||||
nvlink_link_mode mode;
|
||||
|
||||
//
|
||||
// If TL has entered contain, return contain.
|
||||
// Otherwise, return DL state
|
||||
//
|
||||
if (tlState == NVLINK_LINKSTATE_CONTAIN)
|
||||
{
|
||||
mode = nvlink_link_mode_contain;
|
||||
return mode;
|
||||
}
|
||||
|
||||
switch (dlState)
|
||||
{
|
||||
case NVLINK_LINKSTATE_OFF:
|
||||
mode = nvlink_link_mode_off;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_HS:
|
||||
mode = nvlink_link_mode_active;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_SAFE:
|
||||
mode = nvlink_link_mode_swcfg;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_FAULT:
|
||||
mode = nvlink_link_mode_fault;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_RECOVERY:
|
||||
mode = nvlink_link_mode_recovery;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_FAIL:
|
||||
mode = nvlink_link_mode_fail;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_DETECT:
|
||||
mode = nvlink_link_mode_detect;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_RESET:
|
||||
mode = nvlink_link_mode_reset;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_ENABLE_PM:
|
||||
mode = nvlink_link_mode_enable_pm;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_DISABLE_PM:
|
||||
mode = nvlink_link_mode_disable_pm;
|
||||
break;
|
||||
case NVLINK_LINKSTATE_TRAFFIC_SETUP:
|
||||
mode = nvlink_link_mode_traffic_setup;
|
||||
break;
|
||||
default:
|
||||
mode = nvlink_link_mode_unknown;
|
||||
break;
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
static nvlink_tx_sublink_mode
|
||||
_nvlink_core_map_tx_sublink_state
|
||||
(
|
||||
NvU64 state
|
||||
)
|
||||
{
|
||||
nvlink_tx_sublink_mode mode;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case NVLINK_SUBLINK_STATE_TX_HS:
|
||||
mode = nvlink_tx_sublink_mode_hs;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_SINGLE_LANE:
|
||||
mode = nvlink_tx_sublink_mode_single_lane;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_TRAIN:
|
||||
mode = nvlink_tx_sublink_mode_train;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_SAFE:
|
||||
mode = nvlink_tx_sublink_mode_safe;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_OFF:
|
||||
mode = nvlink_tx_sublink_mode_off;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_COMMON_MODE:
|
||||
mode = nvlink_tx_sublink_mode_common_mode;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE:
|
||||
mode = nvlink_tx_sublink_mode_common_mode_disable;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_DATA_READY:
|
||||
mode = nvlink_tx_sublink_mode_data_ready;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_EQ:
|
||||
mode = nvlink_tx_sublink_mode_tx_eq;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_PRBS_EN:
|
||||
mode = nvlink_tx_sublink_mode_pbrs_en;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_TX_POST_HS:
|
||||
mode = nvlink_tx_sublink_mode_post_hs;
|
||||
break;
|
||||
default:
|
||||
mode = nvlink_tx_sublink_mode_unknown;
|
||||
break;
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
static nvlink_rx_sublink_mode
|
||||
_nvlink_core_map_rx_sublink_state
|
||||
(
|
||||
NvU64 state
|
||||
)
|
||||
{
|
||||
nvlink_rx_sublink_mode mode;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case NVLINK_SUBLINK_STATE_RX_HS:
|
||||
mode = nvlink_rx_sublink_mode_hs;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_RX_SINGLE_LANE:
|
||||
mode = nvlink_rx_sublink_mode_single_lane;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_RX_TRAIN:
|
||||
mode = nvlink_rx_sublink_mode_train;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_RX_SAFE:
|
||||
mode = nvlink_rx_sublink_mode_safe;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_RX_OFF:
|
||||
mode = nvlink_rx_sublink_mode_off;
|
||||
break;
|
||||
case NVLINK_SUBLINK_STATE_RX_RXCAL:
|
||||
mode = nvlink_rx_sublink_mode_rxcal;
|
||||
break;
|
||||
default:
|
||||
mode = nvlink_rx_sublink_mode_unknown;
|
||||
break;
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the link is already initialized
|
||||
*
|
||||
* Note: A link is initialized if it is in SWCFG or ACTIVE state
|
||||
*
|
||||
* @param[in] linkMode Link state
|
||||
*
|
||||
* NvBool
|
||||
*/
|
||||
static NvBool
|
||||
_nvlink_core_is_link_initialized
|
||||
(
|
||||
NvU64 linkMode
|
||||
)
|
||||
{
|
||||
if ((linkMode == NVLINK_LINKSTATE_SAFE) ||
|
||||
(linkMode == NVLINK_LINKSTATE_HS))
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the mask of enabled links for the device
|
||||
*
|
||||
* @param[in] dev nvlink_device pointer
|
||||
*
|
||||
* NvU64
|
||||
*/
|
||||
static NvU64
|
||||
_nvlink_core_get_enabled_link_mask
|
||||
(
|
||||
nvlink_device *dev
|
||||
)
|
||||
{
|
||||
NvU64 linkMask = 0x0;
|
||||
nvlink_link *link = NULL;
|
||||
|
||||
nvListForEachEntry(link, &dev->link_list, node)
|
||||
{
|
||||
linkMask |= NVBIT64(link->linkNumber);
|
||||
}
|
||||
|
||||
return linkMask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the device type is supported
|
||||
*
|
||||
* @param[in] devType Device type
|
||||
*
|
||||
* NvBool
|
||||
*/
|
||||
NvBool
|
||||
nvlink_core_is_supported_device_type
|
||||
(
|
||||
NvU32 devType
|
||||
)
|
||||
{
|
||||
if ((devType == nvlink_device_type_ibmnpu) ||
|
||||
(devType == nvlink_device_type_gpu) ||
|
||||
(devType == nvlink_device_type_nvswitch))
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the link and sublink states for the endpoint
|
||||
*
|
||||
* @param[in] link nvlink_device *
|
||||
* @param[out] linkState nvlink_link_state *
|
||||
*/
|
||||
void
|
||||
nvlink_core_get_endpoint_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
nvlink_link_state *linkState
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU32 txSubLinkSubstate = NVLINK_SUBLINK_SUBSTATE_TX_STABLE;
|
||||
NvU32 rxSubLinkSubState = NVLINK_SUBLINK_SUBSTATE_RX_STABLE;
|
||||
NvU64 state = NVLINK_LINKSTATE_INVALID;
|
||||
NvU64 dlState = NVLINK_LINKSTATE_INVALID;
|
||||
NvU64 tlState = NVLINK_LINKSTATE_INVALID;
|
||||
|
||||
//
|
||||
// This is a best case effort to return the current state of the link
|
||||
// to user as part of the ioctl call. Typically, this call should succeed
|
||||
// unless the corresponding HAL/Callbacks are not registered, which can
|
||||
// happen during early development cycle. Adding an assert to catch that
|
||||
// in debug builds.
|
||||
//
|
||||
|
||||
status = link->link_handlers->get_dl_link_mode(link, &dlState);
|
||||
nvlink_assert(status == NVL_SUCCESS);
|
||||
|
||||
link->link_handlers->get_tl_link_mode(link, &tlState);
|
||||
|
||||
linkState->linkMode = _nvlink_core_map_link_state(dlState, tlState);
|
||||
|
||||
status = link->link_handlers->get_tx_mode(link,
|
||||
&state,
|
||||
&txSubLinkSubstate);
|
||||
nvlink_assert(status == NVL_SUCCESS);
|
||||
|
||||
linkState->txSubLinkMode = _nvlink_core_map_tx_sublink_state(state);
|
||||
|
||||
status = link->link_handlers->get_rx_mode(link,
|
||||
&state,
|
||||
&rxSubLinkSubState);
|
||||
nvlink_assert(status == NVL_SUCCESS);
|
||||
|
||||
linkState->rxSubLinkMode = _nvlink_core_map_rx_sublink_state(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nvlink_device * from the PCI DBDF
|
||||
*
|
||||
* @param[in] devInfo PCI Information
|
||||
* @param[out] dev nvlink_device *
|
||||
*/
|
||||
void
|
||||
nvlink_core_get_device_by_devinfo
|
||||
(
|
||||
nvlink_device_info *devInfo,
|
||||
nvlink_device **dev
|
||||
)
|
||||
{
|
||||
nvlink_device *tmpDev = NULL;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
if ( (tmpDev->nodeId == devInfo->nodeId) &&
|
||||
(tmpDev->pciInfo.domain == devInfo->pciInfo.domain) &&
|
||||
(tmpDev->pciInfo.bus == devInfo->pciInfo.bus) &&
|
||||
(tmpDev->pciInfo.device == devInfo->pciInfo.device) &&
|
||||
(tmpDev->pciInfo.function == devInfo->pciInfo.function))
|
||||
{
|
||||
*dev = tmpDev;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// not found any matching device
|
||||
*dev = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nvlink_link * from the PCI DBDF and link#
|
||||
*
|
||||
* @param[in] endPoint PCI Information and link#
|
||||
* @param[out] link nvlink_link *
|
||||
*/
|
||||
void
|
||||
nvlink_core_get_link_by_endpoint
|
||||
(
|
||||
nvlink_endpoint *endPoint,
|
||||
nvlink_link **link
|
||||
)
|
||||
{
|
||||
nvlink_device *tmpDev = NULL;
|
||||
nvlink_link *tmpLink = NULL;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
if ((tmpDev->nodeId == endPoint->nodeId) &&
|
||||
(tmpDev->pciInfo.domain == endPoint->pciInfo.domain) &&
|
||||
(tmpDev->pciInfo.bus == endPoint->pciInfo.bus) &&
|
||||
(tmpDev->pciInfo.device == endPoint->pciInfo.device) &&
|
||||
(tmpDev->pciInfo.function == endPoint->pciInfo.function))
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(tmpLink, tmpDev, node)
|
||||
{
|
||||
if (tmpLink->linkNumber == endPoint->linkIndex)
|
||||
{
|
||||
*link = tmpLink;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// not found any matching link
|
||||
*link = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the nvlink_link ptr, copy the endpoint details for the link
|
||||
*
|
||||
* @param[in] connLink nvlink_link *
|
||||
* @param[out] endPointInfo Endpoint details for the link
|
||||
*/
|
||||
void
|
||||
nvlink_core_copy_endpoint_info
|
||||
(
|
||||
nvlink_link *connLink,
|
||||
nvlink_endpoint *endPointInfo
|
||||
)
|
||||
{
|
||||
nvlink_device *dev = connLink->dev;
|
||||
|
||||
endPointInfo->pciInfo.domain = dev->pciInfo.domain;
|
||||
endPointInfo->pciInfo.bus = dev->pciInfo.bus;
|
||||
endPointInfo->pciInfo.device = dev->pciInfo.device;
|
||||
endPointInfo->pciInfo.function = dev->pciInfo.function;
|
||||
endPointInfo->nodeId = dev->nodeId;
|
||||
endPointInfo->linkIndex = connLink->linkNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the nvlink_device ptr, copy the device details
|
||||
*
|
||||
* @param[in] tmpDev nvlink_device *
|
||||
* @param[out] devInfo Device details
|
||||
*/
|
||||
void
|
||||
nvlink_core_copy_device_info
|
||||
(
|
||||
nvlink_device *tmpDev,
|
||||
nvlink_detailed_dev_info *devInfo
|
||||
)
|
||||
{
|
||||
devInfo->pciInfo.domain = tmpDev->pciInfo.domain;
|
||||
devInfo->pciInfo.bus = tmpDev->pciInfo.bus;
|
||||
devInfo->pciInfo.device = tmpDev->pciInfo.device;
|
||||
devInfo->pciInfo.function = tmpDev->pciInfo.function;
|
||||
devInfo->numLinks = nvListCount(&tmpDev->link_list);
|
||||
devInfo->devType = _nvlink_core_map_device_type(tmpDev->type);
|
||||
devInfo->enabledLinkMask = _nvlink_core_get_enabled_link_mask(tmpDev);
|
||||
// copy device uuid information if available
|
||||
if (tmpDev->uuid != NULL)
|
||||
{
|
||||
nvlink_memcpy(devInfo->devUuid, tmpDev->uuid, NVLINK_UUID_LEN);
|
||||
}
|
||||
|
||||
// copy device name information if available
|
||||
if (tmpDev->deviceName != NULL)
|
||||
{
|
||||
int nameLen = nvlink_strlen(tmpDev->deviceName);
|
||||
int copyLen = 0;
|
||||
copyLen = (nameLen > NVLINK_DEVICE_NAME_LEN_MAX) ? NVLINK_DEVICE_NAME_LEN_MAX : nameLen;
|
||||
nvlink_memcpy(devInfo->deviceName, tmpDev->deviceName, copyLen);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transition to SWCFG on the given array of links
|
||||
*
|
||||
* @param[in] links Array of nvlink_link pointers
|
||||
* @param[in] numLinks Number of links in the array
|
||||
*
|
||||
* return NvlStatus
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_link_init_async
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 numLinks
|
||||
)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
// Sanity check the links array for non-zero links
|
||||
nvlink_assert((links != NULL) && (numLinks > 0));
|
||||
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
|
||||
|
||||
if (!links[i]->bRxDetected || links[i]->bTxCommonModeFail)
|
||||
{
|
||||
// link did not pass RXDET or failed in common mode, don't do anything
|
||||
continue;
|
||||
}
|
||||
|
||||
status = links[i]->link_handlers->get_dl_link_mode(links[i], &linkMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get link mode for %s:%s\n",
|
||||
__FUNCTION__, links[i]->dev->deviceName, links[i]->linkName));
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO : Handle fault checking
|
||||
if (_nvlink_core_is_link_initialized(linkMode))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: %s:%s is already trained to SAFE or HIGH SPEED\n",
|
||||
__FUNCTION__, links[i]->dev->deviceName, links[i]->linkName));
|
||||
|
||||
// link already in higher state. don't do anything
|
||||
continue;
|
||||
}
|
||||
|
||||
// Put the link in SAFE state
|
||||
links[i]->link_handlers->set_dl_link_mode(links[i],
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_STATE_CHANGE_SYNC);
|
||||
}
|
||||
|
||||
//
|
||||
// We could have links which are faulty and cannot be initialized. But proceeding
|
||||
// the initialization sequence allows us to use other non-faulty links. Therefore
|
||||
// return success always.
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a discovery token for the given link
|
||||
*
|
||||
* @param[in] link nvlink_link pointer
|
||||
*
|
||||
* return NvU64
|
||||
*/
|
||||
NvU64
|
||||
nvlink_core_get_link_discovery_token
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
NvU64 token = 0;
|
||||
|
||||
//
|
||||
// generate a unique token value for discovering connections.
|
||||
// link->token is the memory address of the allocated link object,
|
||||
// which is unique within a node. Adding fabric node id
|
||||
// to make it unique across different nodes.
|
||||
//
|
||||
|
||||
token = link->token & ~((NvU64)NVLINK_FABRIC_NODE_ID_MASK << NVLINK_FABRIC_NODE_ID_POS);
|
||||
token = token | ((NvU64)link->dev->nodeId << NVLINK_FABRIC_NODE_ID_POS);
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the dicovery token for the given link
|
||||
*
|
||||
* @param[in] link nvlink_link pointer
|
||||
* @param[in] token Discovery token to write
|
||||
*
|
||||
* return NvlStatus
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_write_link_discovery_token
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 token
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
|
||||
|
||||
// Packet injection can only happen if link is in SWCFG/ACTIVE
|
||||
status = link->link_handlers->get_dl_link_mode(link, &linkMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get link mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
if (!_nvlink_core_is_link_initialized(linkMode))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Packet injection only works for links in SAFE or HS %s:%s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
// Send the token (AN0 packet)
|
||||
link->link_handlers->write_discovery_token(link, token);
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the dicovery token for the given link
|
||||
*
|
||||
* @param[in] link nvlink_link pointer
|
||||
*
|
||||
* return NvU64
|
||||
*/
|
||||
NvU64
|
||||
nvlink_core_read_link_discovery_token
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
NvU64 token = 0;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
|
||||
|
||||
status = link->link_handlers->get_dl_link_mode(link, &linkMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get link mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!_nvlink_core_is_link_initialized(linkMode))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// query discovery token from the link
|
||||
link->link_handlers->read_discovery_token(link, (NvU64 *) &token);
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the connection by correlating the tokens
|
||||
*
|
||||
* @param[in] srcLink nvlink_link pointer
|
||||
* @param[in] writeToken write token
|
||||
* @param[in] skipReadToken token vs sid for connection detection
|
||||
*
|
||||
*/
|
||||
void
|
||||
nvlink_core_correlate_conn_by_token
|
||||
(
|
||||
nvlink_link *srcLink,
|
||||
NvU64 writeToken,
|
||||
NvBool skipReadToken
|
||||
)
|
||||
{
|
||||
nvlink_device *dev = NULL;
|
||||
nvlink_link *dstLink = NULL;
|
||||
NvU64 readToken = 0;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(dstLink, dev, node)
|
||||
{
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
|
||||
nvlink_core_get_intranode_conn(dstLink, &conn);
|
||||
if (conn != NULL)
|
||||
{
|
||||
// skip token read if we already have a connection for the dstLink
|
||||
continue;
|
||||
}
|
||||
|
||||
if (skipReadToken)
|
||||
{
|
||||
if ((srcLink->remoteSid == dstLink->localSid) &&
|
||||
(srcLink->remoteLinkId == dstLink->linkNumber))
|
||||
{
|
||||
readToken = writeToken;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Read the RX sublink for the AN0 packet
|
||||
readToken = nvlink_core_read_link_discovery_token(dstLink);
|
||||
}
|
||||
|
||||
// If token matches, establish the connection
|
||||
if (writeToken == readToken)
|
||||
{
|
||||
// Add to the connections list
|
||||
nvlink_core_add_intranode_conn(srcLink, dstLink);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
446
src/common/nvlink/kernel/nvlink/core/nvlink_link_mgmt.c
Normal file
446
src/common/nvlink/kernel/nvlink/core/nvlink_link_mgmt.c
Normal file
@@ -0,0 +1,446 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
/**
|
||||
* For the given link, check whether the link state is at the requested state.
|
||||
*
|
||||
* @param[in] link NVLink link pointer
|
||||
* @param[in] linkState Target Link State
|
||||
*
|
||||
* return NV_TRUE if the link is in the given state
|
||||
*/
|
||||
NvBool
|
||||
nvlink_core_check_link_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 linkState
|
||||
)
|
||||
{
|
||||
NvU64 crntDlLinkMode = NVLINK_LINKSTATE_OFF;
|
||||
NvU64 crntTlLinkMode = NVLINK_LINKSTATE_OFF;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
switch (linkState)
|
||||
{
|
||||
case NVLINK_LINKSTATE_OFF:
|
||||
case NVLINK_LINKSTATE_RESET:
|
||||
case NVLINK_LINKSTATE_SAFE:
|
||||
case NVLINK_LINKSTATE_HS:
|
||||
{
|
||||
status = link->link_handlers->get_dl_link_mode(link, &crntDlLinkMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get DL link mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
if (crntDlLinkMode == linkState)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NVLINK_LINKSTATE_SLEEP:
|
||||
{
|
||||
status = link->link_handlers->get_tl_link_mode(link, &crntTlLinkMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get TL link mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
if (crntTlLinkMode == linkState)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// return false for default case or the states are not matching
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* For the given link, check whether the tx sublink state is at the
|
||||
* requested state.
|
||||
*
|
||||
* @param[in] link NVLink link pointer
|
||||
* @param[in] txSublinkState Target Tx Sublink State
|
||||
*
|
||||
* return NV_TRUE if the tx sublink is in the given state
|
||||
*/
|
||||
NvBool
|
||||
nvlink_core_check_tx_sublink_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 txSublinkState
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
NvU64 crntTxSublinkMode = NVLINK_SUBLINK_STATE_TX_OFF;
|
||||
NvU32 crntTxSublinkSubMode = NVLINK_SUBLINK_SUBSTATE_TX_STABLE;
|
||||
|
||||
status = link->link_handlers->get_tx_mode(link,
|
||||
&crntTxSublinkMode,
|
||||
&crntTxSublinkSubMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get TX sublink mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
switch (txSublinkState)
|
||||
{
|
||||
case NVLINK_SUBLINK_STATE_TX_OFF:
|
||||
{
|
||||
if (crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_OFF)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NVLINK_SUBLINK_STATE_TX_SAFE:
|
||||
{
|
||||
if (crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_SAFE)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NVLINK_SUBLINK_STATE_TX_HS:
|
||||
{
|
||||
if ((crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_HS) ||
|
||||
(crntTxSublinkMode == NVLINK_SUBLINK_STATE_TX_SINGLE_LANE))
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// return false for default case or the states are not matching
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* For the given link, check whether the rx sublink state is at the
|
||||
* requested state.
|
||||
*
|
||||
* @param[in] link NVLink link pointer
|
||||
* @param[in] rxSublinkState Target Rx Sublink State
|
||||
*
|
||||
* return NV_TRUE if the rx sublink is in the given state
|
||||
*/
|
||||
NvBool
|
||||
nvlink_core_check_rx_sublink_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 rxSublinkState
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
NvU64 crntRxSublinkMode = NVLINK_SUBLINK_STATE_RX_OFF;
|
||||
NvU32 crntRxSublinkSubMode = NVLINK_SUBLINK_SUBSTATE_RX_STABLE;
|
||||
|
||||
status = link->link_handlers->get_rx_mode(link,
|
||||
&crntRxSublinkMode,
|
||||
&crntRxSublinkSubMode);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to get TX sublink mode for %s:%s\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
switch (rxSublinkState)
|
||||
{
|
||||
case NVLINK_SUBLINK_STATE_RX_OFF:
|
||||
{
|
||||
if (crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_OFF)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NVLINK_SUBLINK_STATE_RX_SAFE:
|
||||
{
|
||||
if (crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_SAFE)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NVLINK_SUBLINK_STATE_RX_HS:
|
||||
{
|
||||
if ((crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_HS) ||
|
||||
(crntRxSublinkMode == NVLINK_SUBLINK_STATE_RX_SINGLE_LANE))
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// return false for default case or the states are not matching
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll the link to reach the specified state upto the given timeout.
|
||||
* Link state transition is considered failed once timeout occurs.
|
||||
*
|
||||
* @param[in] link NVLink link pointer
|
||||
* @param[in] linkState Target Link state
|
||||
* @param[in] timeout Timeout
|
||||
*
|
||||
* return NVL_SUCCESS if the link transitioned to the target state
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_poll_link_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 linkState,
|
||||
NvU32 timeout
|
||||
)
|
||||
{
|
||||
NvU64 currentLinkState = ~0;
|
||||
|
||||
link->link_handlers->get_dl_link_mode(link, ¤tLinkState);
|
||||
|
||||
while (currentLinkState != linkState)
|
||||
{
|
||||
nvlink_sleep(1);
|
||||
|
||||
timeout--;
|
||||
|
||||
if (!timeout)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Timeout occured while polling on link.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link info: device: %s link: %s link state "
|
||||
"expected: 0x%08llx actual: 0x%08llx.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName,
|
||||
linkState, currentLinkState));
|
||||
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
link->link_handlers->get_dl_link_mode(link, ¤tLinkState);
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for a given timeout period for the sublink to reach a given
|
||||
* state. Sublink state transition is considered as failed once the
|
||||
* timeout occurs
|
||||
*
|
||||
* @param[in] localTxSubLink Local NVLink pointer
|
||||
* @param[in] localTxSubLinkState Local Tx Sublink State
|
||||
* @param[in] localTxSubLinkSubState Local Tx Sublink Substate
|
||||
* @param[in] remoteRxSubLink Remote NVLink pointer
|
||||
* @param[in] remoteRxSubLinkState Remote Rx Sublink State
|
||||
* @param[in] remoteRxSubLinkSubState Remote Rx Sublink Substate
|
||||
* @param[in] timeout Timeout
|
||||
*
|
||||
* return NVL_SUCCESS is the sublink transitioned to the given state
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_poll_sublink_state
|
||||
(
|
||||
nvlink_link *localTxSubLink,
|
||||
NvU64 localTxSubLinkState,
|
||||
NvU32 localTxSubLinkSubState,
|
||||
nvlink_link *remoteRxSubLink,
|
||||
NvU64 remoteRxSubLinkState,
|
||||
NvU32 remoteRxSubLinkSubState,
|
||||
NvU32 timeout
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
// check for tx sublink if a valid link is specified
|
||||
if (localTxSubLink)
|
||||
{
|
||||
status = nvlink_core_poll_tx_sublink_state(localTxSubLink,
|
||||
localTxSubLinkState,
|
||||
localTxSubLinkSubState,
|
||||
timeout);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
// polling on tx sublink failed. skip any rx polling
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// check for rx sublink if a valid link is specified and no
|
||||
// timeout on tx sublink (if it was specified)
|
||||
//
|
||||
if (remoteRxSubLink)
|
||||
{
|
||||
status = nvlink_core_poll_rx_sublink_state(remoteRxSubLink,
|
||||
remoteRxSubLinkState,
|
||||
remoteRxSubLinkSubState,
|
||||
timeout);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for the tx sublink to reach the specified state upto the given
|
||||
* timeout. Sublink state transition is considered failed once timeout
|
||||
* occurs.
|
||||
*
|
||||
* @param[in] link NVLink pointer
|
||||
* @param[in] txSublinkState Tx Sublink State
|
||||
* @param[in] txSublinkSubState Tx Sublink Substate
|
||||
* @param[in] timeout Timeout
|
||||
*
|
||||
* return NVL_SUCCESS if the tx sublink transitioned to the target state
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_poll_tx_sublink_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 txSublinkState,
|
||||
NvU32 txSublinkSubState,
|
||||
NvU32 timeout
|
||||
)
|
||||
{
|
||||
NvU64 currentTxSublinkState = ~0;
|
||||
NvU32 currentTxSublinkSubState = ~0;
|
||||
|
||||
link->link_handlers->get_tx_mode(link,
|
||||
¤tTxSublinkState,
|
||||
¤tTxSublinkSubState);
|
||||
|
||||
while (!((currentTxSublinkState == txSublinkState) &&
|
||||
(currentTxSublinkSubState == txSublinkSubState)))
|
||||
{
|
||||
nvlink_sleep(1);
|
||||
|
||||
timeout--;
|
||||
|
||||
if (!timeout)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Timeout occured while polling on link.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link info: device: %s link: %s txsublink state"
|
||||
" expected: 0x%08llx actual: 0x%08llx.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName,
|
||||
txSublinkState, currentTxSublinkState));
|
||||
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
link->link_handlers->get_tx_mode(link,
|
||||
¤tTxSublinkState,
|
||||
¤tTxSublinkSubState);
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for the rx sublink to reach the specified state upto the given
|
||||
* timeout. Sublink state transition is considered failed once timeout
|
||||
* occurs.
|
||||
*
|
||||
* @param[in] link NVLink pointer
|
||||
* @param[in] rxSublinkState Rx Sublink State
|
||||
* @param[in] rxSublinkSubState Rx Sublink Substate
|
||||
* @param[in] timeout Timeout
|
||||
*
|
||||
* return NVL_SUCCESS if the rx sublink transitioned to the target state
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_poll_rx_sublink_state
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU64 rxSublinkState,
|
||||
NvU32 rxSublinkSubState,
|
||||
NvU32 timeout
|
||||
)
|
||||
{
|
||||
NvU64 currentRxSublinkState = ~0;
|
||||
NvU32 currentRxSublinkSubState = ~0;
|
||||
|
||||
link->link_handlers->get_rx_mode(link,
|
||||
¤tRxSublinkState,
|
||||
¤tRxSublinkSubState);
|
||||
|
||||
while (!((currentRxSublinkState == rxSublinkState) &&
|
||||
(currentRxSublinkSubState == rxSublinkSubState)))
|
||||
{
|
||||
nvlink_sleep(1);
|
||||
|
||||
timeout--;
|
||||
|
||||
if (!timeout)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Timeout occured while polling on link.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link info: device: %s link: %s rxsublink state "
|
||||
"expected: 0x%08llx actual: 0x%08llx.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName,
|
||||
rxSublinkState, currentRxSublinkState));
|
||||
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
link->link_handlers->get_rx_mode(link,
|
||||
¤tRxSublinkState,
|
||||
¤tRxSublinkSubState);
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
114
src/common/nvlink/kernel/nvlink/core/nvlink_logger.c
Normal file
114
src/common/nvlink/kernel/nvlink/core/nvlink_logger.c
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
static void _nvlink_core_print_link(nvlink_link *);
|
||||
|
||||
/**
|
||||
* Print link state of a single nvlink_link
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
*/
|
||||
void
|
||||
nvlink_core_print_link_state
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
NvU64 linkMode = 0;
|
||||
NvU64 txSublinkMode = 0;
|
||||
NvU64 rxSublinkMode = 0;
|
||||
NvU32 txSublinkSubMode = 0;
|
||||
NvU32 rxSublinkSubMode = 0;
|
||||
|
||||
link->link_handlers->get_dl_link_mode(link, &linkMode);
|
||||
link->link_handlers->get_tx_mode(link, &txSublinkMode, &txSublinkSubMode);
|
||||
link->link_handlers->get_rx_mode(link, &rxSublinkMode, &rxSublinkSubMode);
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: %s:%s linkMode: %lld txSublinkMode: %lld rxSublinkMode: %lld\n",
|
||||
__FUNCTION__,
|
||||
link->dev->deviceName, link->linkName,
|
||||
linkMode, txSublinkMode, rxSublinkMode));
|
||||
}
|
||||
|
||||
/**
|
||||
* Print information of a single nvlink intranode connection
|
||||
*
|
||||
* @param[in] conn NVLink connection pointer
|
||||
*/
|
||||
void
|
||||
nvlink_core_print_intranode_conn
|
||||
(
|
||||
nvlink_intranode_conn *conn
|
||||
)
|
||||
{
|
||||
if (NULL == conn) return;
|
||||
|
||||
_nvlink_core_print_link(conn->end0);
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, " <---> "));
|
||||
_nvlink_core_print_link(conn->end1);
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Print link type and device information of a single nvlink_link
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
*/
|
||||
static void
|
||||
_nvlink_core_print_link
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
switch (link->dev->type)
|
||||
{
|
||||
case NVLINK_DEVICE_TYPE_GPU:
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "NVGPU"));
|
||||
break;
|
||||
case NVLINK_DEVICE_TYPE_IBMNPU:
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "IBMNPU"));
|
||||
break;
|
||||
case NVLINK_DEVICE_TYPE_NVSWITCH:
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "NVSWITCH"));
|
||||
break;
|
||||
case NVLINK_DEVICE_TYPE_EBRIDGE:
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, "EBRIDGE"));
|
||||
break;
|
||||
}
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"(%x): %04x:%02x:%02x.%x %s",
|
||||
link->dev->pciInfo.device,
|
||||
link->dev->pciInfo.domain,
|
||||
link->dev->pciInfo.bus,
|
||||
link->dev->pciInfo.device,
|
||||
link->dev->pciInfo.function,
|
||||
link->linkName));
|
||||
}
|
||||
845
src/common/nvlink/kernel/nvlink/core/nvlink_shutdown.c
Normal file
845
src/common/nvlink/kernel/nvlink/core/nvlink_shutdown.c
Normal file
@@ -0,0 +1,845 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
static void _nvlink_core_clear_link_state(nvlink_link *);
|
||||
|
||||
/**
|
||||
* [CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown given intranode connections from active to L2 state
|
||||
*
|
||||
* @param[in] conns Array of connections to transition to L2
|
||||
* @param[in] connCount Number of connections in the array
|
||||
* @param[in] flags Flags to track if training is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the connections transitioned to L2 successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_powerdown_intranode_conns_from_active_to_L2
|
||||
(
|
||||
nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU64 linkMode = NVLINK_LINKSTATE_OFF;
|
||||
NvU32 version;
|
||||
NvU32 i;
|
||||
|
||||
if ((conns == NULL) || (connCount == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connections to exit L2\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Set the version. Currently, only one version is supported on a chip
|
||||
version = conns[0]->end0->version;
|
||||
|
||||
/**************** Start the L2 entry sequence for the connections ***************/
|
||||
|
||||
// NVLink 3.0 and beyond, link needs to be ACTIVE before it can be transitioned to L2
|
||||
if ((version >= NVLINK_DEVICE_VERSION_30) && (connCount > 0))
|
||||
{
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_HS);
|
||||
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can train connections to HS only when they are already in SAFE
|
||||
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_SAFE);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(&conns[i], 1, flags))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Failed to train connection to ACTIVE.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 0: Disable HeartBeat on the endpoints of all connections
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_DISABLE_HEARTBEAT,
|
||||
flags);
|
||||
|
||||
// Only send if not in loopback
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_DISABLE_HEARTBEAT,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 1: Disable PM on the endpoints of all connections
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
|
||||
// Only send if not in loopback
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_DISABLE_PM completes
|
||||
if (flags == NVLINK_STATE_CHANGE_ASYNC)
|
||||
{
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode);
|
||||
if ((status != NVL_SUCCESS) ||
|
||||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s is not in good state after sending DISABLE PM\n",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
|
||||
status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode);
|
||||
if ((status != NVL_SUCCESS) ||
|
||||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s is not in good state after sending DISABLE PM\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for each connection, if both the ends and their sublinks are in HS mode
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_HS);
|
||||
if (status == NVL_ERR_INVALID_STATE)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s - Link %s:%s is not in good state\n",
|
||||
__FUNCTION__,
|
||||
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
|
||||
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
else if (status == NVL_SUCCESS)
|
||||
{
|
||||
// STEP 2: Change link state from ACTIVE to SWCFG on all endpoints
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
// Only send if not in loopback
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// All the endpoints should now either be in SWCFG or transitioning to SWCFG. Poll for all
|
||||
// endpoints to reach SWCFG. If any endpoint does not transition to SWCFG, return error
|
||||
//
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Wait for the end0 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG\n",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
|
||||
// Wait for the end1 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 3: Change sub-link state to SAFE on all endpoints
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
|
||||
// Only send if not in loopback
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Poll for all endpoints sub-link state to reach SAFE
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Wait for sublinks to go to SAFE
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE\n",
|
||||
__FUNCTION__));
|
||||
}
|
||||
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE\n",
|
||||
__FUNCTION__));
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 4: Save link state on all the endpoints
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
if (!conns[i]->end0->bStateSaved)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAVE_STATE,
|
||||
flags);
|
||||
}
|
||||
|
||||
if (!conns[i]->end1->bStateSaved)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAVE_STATE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_SAVE_STATE completes
|
||||
if (flags == NVLINK_STATE_CHANGE_ASYNC)
|
||||
{
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode);
|
||||
if ((status != NVL_SUCCESS) ||
|
||||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s is not in good state after sending SAVESTATE command\n",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
|
||||
status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode);
|
||||
if ((status != NVL_SUCCESS) ||
|
||||
(linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s is not in good state after sending SAVESTATE command\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 5: Trigger the sleep request on all the endpoints
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
//
|
||||
// Send SLEEP request on one end of connection if not in loopback.
|
||||
// Don' poll, since transition will happen when both ends get the request
|
||||
//
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SLEEP,
|
||||
NVLINK_STATE_CHANGE_ASYNC);
|
||||
}
|
||||
|
||||
// Send SLEEP request on both ends and poll for completion
|
||||
conns[i]->end1->link_handlers->set_tl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SLEEP,
|
||||
NVLINK_STATE_CHANGE_SYNC);
|
||||
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SLEEP,
|
||||
NVLINK_STATE_CHANGE_SYNC);
|
||||
}
|
||||
|
||||
// Finally check the connection states
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_SLEEP);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link %s:%s - Link %s:%s is not in good state after sending SLEEP request\n",
|
||||
__FUNCTION__,
|
||||
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
|
||||
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
|
||||
// Update the link and sublink states in the core library
|
||||
conns[i]->end0->state = NVLINK_LINKSTATE_SLEEP;
|
||||
conns[i]->end1->state = NVLINK_LINKSTATE_SLEEP;
|
||||
conns[i]->end0->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
|
||||
conns[i]->end1->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
|
||||
conns[i]->end0->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
|
||||
conns[i]->end1->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
|
||||
|
||||
// Update power state transition status for the connection
|
||||
conns[i]->end0->powerStateTransitionStatus = nvlink_power_state_in_L2;
|
||||
conns[i]->end1->powerStateTransitionStatus = nvlink_power_state_in_L2;
|
||||
}
|
||||
|
||||
/***************** End of L2 entry sequence for the connections ****************/
|
||||
|
||||
//
|
||||
// Note that status is squashed, since the expectation is that we soldier on if any link fails
|
||||
// during the transition to L2 state
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* [PSEUDO-CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown the given array of intranode connections from ACTIVE to OFF state
|
||||
*
|
||||
* @param[in] conns Array of connections to shutdown
|
||||
* @param[in] connCount Number of connections in the array
|
||||
* @param[in] flags Flags to track if shutdown is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the connections shutdown successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_powerdown_intranode_conns_from_active_to_off
|
||||
(
|
||||
nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU32 i;
|
||||
|
||||
if ((conns == NULL) || (connCount == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connections to shutdown\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Disable Power Management before moving link out of Active
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
}
|
||||
|
||||
// Move both ends to SWCFG
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Poll for links to reach SWCFG & initiate sublinks to SAFE state
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Wait for the end0 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
|
||||
// to track Failure
|
||||
conns[i]->end0->inSWCFG = NV_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
conns[i]->end0->inSWCFG = NV_TRUE;
|
||||
}
|
||||
|
||||
// Wait for the end1 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
|
||||
// to track Failure
|
||||
conns[i]->end1->inSWCFG = NV_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
conns[i]->end1->inSWCFG = NV_TRUE;
|
||||
}
|
||||
|
||||
// Change each sublink state to SAFE
|
||||
if(conns[i]->end0->inSWCFG == NV_TRUE)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
}
|
||||
|
||||
if (conns[i]->end0 != conns[i]->end1 && conns[i]->end1->inSWCFG == NV_TRUE)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Poll for sublinks to reach SAFE state
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Wait for sublinks to go to SAFE
|
||||
if(conns[i]->end0->inSWCFG == NV_TRUE)
|
||||
{
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
}
|
||||
if (status != NVL_SUCCESS || conns[i]->end0->inSWCFG == NV_FALSE)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE",
|
||||
__FUNCTION__));
|
||||
}
|
||||
|
||||
if(conns[i]->end1->inSWCFG == NV_TRUE)
|
||||
{
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
}
|
||||
if (status != NVL_SUCCESS || conns[i]->end1->inSWCFG == NV_FALSE)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE",
|
||||
__FUNCTION__));
|
||||
}
|
||||
|
||||
//
|
||||
// Disable error detect on both sides of the link
|
||||
//
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_DISABLE_ERR_DETECT,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_DISABLE_ERR_DETECT,
|
||||
flags);
|
||||
}
|
||||
|
||||
//
|
||||
// Disable Lanes on both sides of the link
|
||||
//
|
||||
status = conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_LANE_DISABLE,
|
||||
flags);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to disable lanes for link %s:%s\n",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
status = conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_LANE_DISABLE,
|
||||
flags);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to disable lanes for link %s:%s\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Shutdown Lanes on both sides of the link
|
||||
//
|
||||
status = conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_LANE_SHUTDOWN,
|
||||
flags);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to shutdown lanes for link %s:%s\n",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
status = conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_LANE_SHUTDOWN,
|
||||
flags);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to shutdown lanes for link %s:%s\n",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, NVLINK_LINKSTATE_OFF, flags);
|
||||
|
||||
// Link becomes in-accessible after its turned off. Check if this is a loopback connection
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, NVLINK_LINKSTATE_OFF, flags);
|
||||
}
|
||||
|
||||
_nvlink_core_clear_link_state(conns[i]->end0);
|
||||
_nvlink_core_clear_link_state(conns[i]->end1);
|
||||
}
|
||||
|
||||
//
|
||||
// Squash status. If any side of link doesn not respond the link is
|
||||
// shutdown unilaterally
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Power down the given array of intranode connections from ACTIVE to SWCFG state
|
||||
*
|
||||
* @param[in] conns Array of connections to shutdown
|
||||
* @param[in] connCount Number of connections in the array
|
||||
* @param[in] flags Flags to track if shutdown is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the connections shutdown successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_powerdown_intranode_conns_from_active_to_swcfg
|
||||
(
|
||||
nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU32 i;
|
||||
|
||||
if ((conns == NULL) || (connCount == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connections to shutdown\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Disable Power Management before moving link out of Active
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_DISABLE_PM,
|
||||
flags);
|
||||
}
|
||||
|
||||
// Move both ends to SWCFG
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Poll _SAFE state for connections and set corresponding sublinks to _SAFE
|
||||
//
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
// Wait for the end0 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG",
|
||||
__FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Change each sublink state to SAFE
|
||||
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
}
|
||||
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
// Wait for the end1 to go to SWCFG
|
||||
status = nvlink_core_poll_link_state(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_SAFE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set endpoint %s:%s in SWCFG",
|
||||
__FUNCTION__, conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Change each sublink state to SAFE
|
||||
conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for sublinks to go to SAFE
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE (TX:RX)",
|
||||
__FUNCTION__));
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: sublinks (%s:%s) (%s:%s)",
|
||||
__FUNCTION__,
|
||||
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
|
||||
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
|
||||
status = nvlink_core_poll_sublink_state(conns[i]->end1,
|
||||
NVLINK_SUBLINK_STATE_TX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_TX_STABLE,
|
||||
conns[i]->end0,
|
||||
NVLINK_SUBLINK_STATE_RX_SAFE,
|
||||
NVLINK_SUBLINK_SUBSTATE_RX_STABLE,
|
||||
NVLINK_TRANSITION_SAFE_TIMEOUT);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Unable to set sublinks to SAFE (RX:TX)",
|
||||
__FUNCTION__));
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: sublinks (%s:%s) (%s:%s)",
|
||||
__FUNCTION__,
|
||||
conns[i]->end0->dev->deviceName, conns[i]->end0->linkName,
|
||||
conns[i]->end1->dev->deviceName, conns[i]->end1->linkName));
|
||||
}
|
||||
}
|
||||
|
||||
// Update tracking info
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Connection is in SAFE mode. ",
|
||||
__FUNCTION__));
|
||||
nvlink_core_print_intranode_conn(conns[i]);
|
||||
}
|
||||
|
||||
//
|
||||
// Squash status. If any side of link doesn not respond the link is
|
||||
// shutdown unilaterally
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the given array of intranode connections
|
||||
*
|
||||
* @param[in] conns Array of connections to reset
|
||||
* @param[in] connCount Number of connections in the array
|
||||
* @param[in] flags Flags
|
||||
*
|
||||
* return NVL_SUCCESS if the connections reset successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_core_reset_intranode_conns
|
||||
(
|
||||
nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
if ((conns == NULL) || (connCount == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connections to shutdown\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
//
|
||||
// Reset both ends of this connection.
|
||||
// This path should enable/init those link endpoints as well.
|
||||
//
|
||||
// NVLink3.0 + uses the TL link reset
|
||||
//
|
||||
if (conns[i]->end0->version >= NVLINK_DEVICE_VERSION_30)
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_tl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_RESET,
|
||||
flags);
|
||||
if (conns[i]->end0 != conns[i]->end1)
|
||||
{
|
||||
conns[i]->end1->link_handlers->set_tl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_RESET,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0,
|
||||
NVLINK_LINKSTATE_RESET,
|
||||
flags);
|
||||
conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1,
|
||||
NVLINK_LINKSTATE_RESET,
|
||||
flags);
|
||||
}
|
||||
|
||||
_nvlink_core_clear_link_state(conns[i]->end0);
|
||||
_nvlink_core_clear_link_state(conns[i]->end1);
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears Core Library State
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
*/
|
||||
static void
|
||||
_nvlink_core_clear_link_state
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
// Receiver Detect needs to happen again
|
||||
link->bRxDetected = NV_FALSE;
|
||||
|
||||
// INITNEGOTIATE needs to happen again
|
||||
link->bInitnegotiateConfigGood = NV_FALSE;
|
||||
|
||||
// TxCommonMode needs to happen again
|
||||
link->bTxCommonModeFail = NV_FALSE;
|
||||
|
||||
// SAFE transition needs to happen again
|
||||
link->bSafeTransitionFail = NV_FALSE;
|
||||
|
||||
// Reset the SW state tracking the link and sublink states
|
||||
link->state = NVLINK_LINKSTATE_OFF;
|
||||
link->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
|
||||
link->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
|
||||
}
|
||||
1078
src/common/nvlink/kernel/nvlink/core/nvlink_training.c
Normal file
1078
src/common/nvlink/kernel/nvlink/core/nvlink_training.c
Normal file
File diff suppressed because it is too large
Load Diff
3557
src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c
Normal file
3557
src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
/**
|
||||
* Get the connected remote endpoint information
|
||||
*
|
||||
* For a given link, return the remote endpoint details it is connected to.
|
||||
* If there is no connection associated with the specified link, then, the
|
||||
* conn_info.connected member will be NV_FALSE.
|
||||
*
|
||||
* Note: This routine will not initiate any link initialization or topology
|
||||
* discovery.
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[out] conn_info Details of remote endpoint
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_get_remote_conn_info
|
||||
(
|
||||
nvlink_link *link,
|
||||
nvlink_conn_info *conn_info
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link *remoteEnd = NULL;
|
||||
nvlink_intranode_conn *intraConn = NULL;
|
||||
nvlink_internode_conn *interConn = NULL;
|
||||
NvU32 numLinks = 0;
|
||||
|
||||
nvlink_link **links = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (links == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Initialize connected state to false
|
||||
conn_info->bConnected = NV_FALSE;
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)links);
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the
|
||||
// connnection list
|
||||
//
|
||||
|
||||
// Find the associated intranode connection with this link
|
||||
nvlink_core_get_intranode_conn(link, &intraConn);
|
||||
if (intraConn != NULL)
|
||||
{
|
||||
// Get the required remote endpoint of the connection
|
||||
remoteEnd = (intraConn->end0 == link ?
|
||||
intraConn->end1 : intraConn->end0);
|
||||
|
||||
// Mark the connected state
|
||||
conn_info->bConnected = NV_TRUE;
|
||||
|
||||
if ((numLinks+1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_assert(0);
|
||||
|
||||
// Release the top-level lock and free links
|
||||
nvlink_lib_top_lock_release();
|
||||
nvlink_free((void *)links);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
|
||||
links[numLinks] = remoteEnd;
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
//
|
||||
// On multi-node systems, check the internode connection
|
||||
// list as well to return the connection information
|
||||
//
|
||||
nvlink_core_get_internode_conn(link, &interConn);
|
||||
if (interConn != NULL)
|
||||
{
|
||||
// Mark the connected state
|
||||
conn_info->bConnected = NV_TRUE;
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
// Acquire per-link lock
|
||||
status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_lib_top_lock_release();
|
||||
nvlink_free((void *)links);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (intraConn != NULL)
|
||||
{
|
||||
nvlink_core_copy_intranode_conn_info(remoteEnd, conn_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (interConn != NULL)
|
||||
{
|
||||
nvlink_core_copy_internode_conn_info(&interConn->remote_end,
|
||||
conn_info);
|
||||
}
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
if (links != NULL)
|
||||
{
|
||||
nvlink_free((void *)links);
|
||||
}
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the connected remote endpoint information
|
||||
*
|
||||
* For a given link, return the remote endpoint it is connected to.
|
||||
*
|
||||
* Note: This routine triggers topology discovery on the set of
|
||||
* links registered in the core library
|
||||
*
|
||||
* @param[in] end NVLink Link pointer
|
||||
* @param[out] conn_info Details of remote endpoint
|
||||
* @param[in] flags Flags
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_discover_and_get_remote_conn_info
|
||||
(
|
||||
nvlink_link *end,
|
||||
nvlink_conn_info *conn_info,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link *link = NULL;
|
||||
nvlink_link *remote_end = NULL;
|
||||
nvlink_device *dev = NULL;
|
||||
NvU32 numLinks = 0;
|
||||
|
||||
nvlink_link **links = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (links == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)links);
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists
|
||||
//
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_assert(0);
|
||||
|
||||
// Release the top-level lock and free links
|
||||
nvlink_lib_top_lock_release();
|
||||
nvlink_free((void *)links);
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire the per-link locks
|
||||
status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
nvlink_free((void *)links);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Initialize connected state to false
|
||||
conn_info->bConnected = NV_FALSE;
|
||||
|
||||
// Get the remote_end of the link
|
||||
nvlink_core_discover_and_get_remote_end(end, &remote_end, flags);
|
||||
|
||||
if (remote_end)
|
||||
{
|
||||
// mark the connected state
|
||||
conn_info->bConnected = NV_TRUE;
|
||||
nvlink_core_copy_intranode_conn_info(remote_end, conn_info);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
if (links != NULL)
|
||||
{
|
||||
nvlink_free((void *)links);
|
||||
}
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
/**
|
||||
* Re-Initialize a given link from OFF to SWCFG
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[in] flags Flag to track if the initialization is aync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the initialization was successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_reinit_link_from_off_to_swcfg
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
nvlink_link *links[2] = {0};
|
||||
|
||||
if (!link)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the
|
||||
// connection list
|
||||
//
|
||||
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
if (!conn)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connection was found for this link.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
links[0] = conn->end0;
|
||||
links[1] = conn->end1;
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(links, 2);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
{
|
||||
nvlink_core_init_links_from_off_to_swcfg(links, 2, flags);
|
||||
}
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, 2);
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,323 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
/**
|
||||
* TODO: Rework this function to acquire locks and update callers
|
||||
*
|
||||
* Check if the device has no links registered
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
*
|
||||
* return NV_TRUE if the device has no links registered
|
||||
*/
|
||||
NvBool
|
||||
nvlink_lib_is_link_list_empty
|
||||
(
|
||||
nvlink_device *dev
|
||||
)
|
||||
{
|
||||
NvBool isEmpty = NV_TRUE;
|
||||
|
||||
isEmpty = nvListIsEmpty(&dev->link_list);
|
||||
|
||||
return isEmpty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the link associated with the given link id.
|
||||
*
|
||||
* @param[in] device NVLink Device Pointer
|
||||
* @param[in] link_id Link Id of the given link
|
||||
* @param[out] link NVLink Link pointer
|
||||
*
|
||||
* return NVL_SUCCESS on success
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_get_link
|
||||
(
|
||||
nvlink_device *device,
|
||||
NvU32 link_id,
|
||||
nvlink_link **link
|
||||
)
|
||||
{
|
||||
nvlink_link *cur = NULL;
|
||||
NvlStatus status = -NVL_NOT_FOUND;
|
||||
|
||||
if (device == NULL || link == NULL)
|
||||
{
|
||||
return -NVL_BAD_ARGS;
|
||||
}
|
||||
|
||||
*link = NULL;
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the
|
||||
// link list for the device
|
||||
//
|
||||
|
||||
// Reset status to -NVL_NOT_FOUND
|
||||
status = -NVL_NOT_FOUND;
|
||||
|
||||
FOR_EACH_LINK_REGISTERED(cur, device, node)
|
||||
{
|
||||
if (cur->linkNumber == link_id)
|
||||
{
|
||||
*link = cur;
|
||||
status = NVL_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Release the top level-lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the given link as the link master.
|
||||
* This requires that the remote end of the link is known, and that it
|
||||
* hasn't set itself to be the master.
|
||||
*
|
||||
* Note: This function is used by RM to set master attribute to a link
|
||||
* in order to handle GPU lock inversion problem while servicing
|
||||
* link interrupts(re-training). With external fabric management
|
||||
* enabled, we don't have the issue. Also we don't have to worry
|
||||
* about the inter-node connections which are managed by FM.
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
*
|
||||
* return NVL_SUCCESS if the master was set
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_set_link_master
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
nvlink_link *remote_end = NULL;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
nvlink_link *links[2] = {0};
|
||||
NvU32 numLinks = 0;
|
||||
|
||||
if (link == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the
|
||||
// connection list
|
||||
//
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
if (conn != NULL)
|
||||
{
|
||||
remote_end = (conn->end0 == link ? conn->end1 : conn->end0);
|
||||
links[numLinks] = remote_end;
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Early return if we've already done this
|
||||
if (link->master)
|
||||
{
|
||||
status = NVL_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make sure the remote end exists and hasn't claimed the master yet
|
||||
if (remote_end == NULL || remote_end->master)
|
||||
{
|
||||
status = NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
else
|
||||
{
|
||||
link->master = NV_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the link master associated with the given link.
|
||||
* This may be the given link, or it may be the remote end. In the case
|
||||
* when no master is assigned or the remote end is not known, this will
|
||||
* return an error.
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[out] master Master endpoint for the link
|
||||
*
|
||||
* return NVL_SUCCESS if the master was found
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_get_link_master
|
||||
(
|
||||
nvlink_link *link,
|
||||
nvlink_link **master
|
||||
)
|
||||
{
|
||||
nvlink_link *remote_end = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link *links[2] = {0};
|
||||
NvU32 numLinks = 0;
|
||||
|
||||
if (link == NULL || master == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the
|
||||
// connection list
|
||||
//
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
if (conn != NULL)
|
||||
{
|
||||
remote_end = (conn->end0 == link ? conn->end1 : conn->end0);
|
||||
links[numLinks] = remote_end;
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
if (link->master)
|
||||
{
|
||||
*master = link;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make sure the remote end exists and hasn't claimed the master yet
|
||||
if (remote_end == NULL)
|
||||
{
|
||||
status = NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
*master = remote_end;
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,508 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
static NvBool _nvlink_lib_is_device_registered(nvlink_device *);
|
||||
static NvBool _nvlink_lib_is_link_registered(nvlink_device *, nvlink_link *);
|
||||
|
||||
/**
|
||||
* Associates device with the NVLink Core Library
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
*
|
||||
* return NVL_SUCCESS if the device is registered successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_register_device
|
||||
(
|
||||
nvlink_device *dev
|
||||
)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
NvlStatus result = NVL_SUCCESS;
|
||||
|
||||
if (dev == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad device pointer\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Assign the deviceId for the device
|
||||
dev->deviceId = (NvU64)(NvUPtr)dev;
|
||||
|
||||
// Assign fabric node id to the device object
|
||||
dev->nodeId = nvlinkLibCtx.nodeId;
|
||||
|
||||
// Register the device if not yet registered
|
||||
if (!_nvlink_lib_is_device_registered(dev))
|
||||
{
|
||||
// Initialize the node and link list for the device
|
||||
nvListInit(&dev->link_list);
|
||||
nvListInit(&dev->node);
|
||||
|
||||
// Add the device to the list of devices
|
||||
nvListAppend(&dev->node, &nvlinkLibCtx.nv_devicelist_head.node);
|
||||
|
||||
result = NVL_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: %s is already registered in nvlink core\n",
|
||||
__FUNCTION__, dev->deviceName));
|
||||
|
||||
result = NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unassociates device from the NVLink Core
|
||||
* Includes removing any links related to the device if still registered
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
*
|
||||
* return NVL_SUCCESS if the device is un-registered successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_unregister_device
|
||||
(
|
||||
nvlink_device *dev
|
||||
)
|
||||
{
|
||||
NvBool bConnected = NV_FALSE;
|
||||
nvlink_intranode_conn *intra_conn = NULL;
|
||||
nvlink_internode_conn *inter_conn = NULL;
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
NvU32 numLinks = 0;
|
||||
nvlink_link *curLink = NULL;
|
||||
nvlink_link *nextLink = NULL;
|
||||
|
||||
if (dev == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad device pointer\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Loop to unregister each link from the device
|
||||
FOR_EACH_LINK_REGISTERED_SAFE(curLink, nextLink, dev, node)
|
||||
{
|
||||
// Reset the variables specific to each link
|
||||
bConnected = NV_FALSE;
|
||||
intra_conn = NULL;
|
||||
inter_conn = NULL;
|
||||
numLinks = 0;
|
||||
|
||||
// We will use at most 2 links in this function - the link and it's partner
|
||||
nvlink_link *links[2] = {0};
|
||||
|
||||
links[numLinks] = curLink;
|
||||
numLinks++;
|
||||
|
||||
// Check if there's an intranode connection present
|
||||
nvlink_core_get_intranode_conn(curLink, &intra_conn);
|
||||
if (intra_conn != NULL)
|
||||
{
|
||||
// Mark the endpoint as connected
|
||||
bConnected = NV_TRUE;
|
||||
|
||||
if (intra_conn->end0 == curLink)
|
||||
{
|
||||
links[numLinks] = intra_conn->end1;
|
||||
}
|
||||
else
|
||||
{
|
||||
links[numLinks] = intra_conn->end0;
|
||||
}
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
//
|
||||
// Check if there's an internode connection present
|
||||
// Only the local end required for internode connection
|
||||
// (which is registered above) so just detect this for now
|
||||
//
|
||||
nvlink_core_get_internode_conn(curLink, &inter_conn);
|
||||
|
||||
// Acquire per-link lock
|
||||
lock_status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
nvlink_lib_top_lock_release();
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
if (intra_conn != NULL)
|
||||
{
|
||||
// Remove the associated intranode connection with this link from the list
|
||||
nvlink_core_remove_intranode_conn(intra_conn);
|
||||
}
|
||||
|
||||
if (inter_conn != NULL)
|
||||
{
|
||||
// Remove the associated internode connection with this link from the list
|
||||
nvlink_core_remove_internode_conn(curLink);
|
||||
}
|
||||
|
||||
// Remove the link from the link list for the device
|
||||
nvListDel(&curLink->node);
|
||||
|
||||
// Release and free the link locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
nvlink_lib_link_lock_free(curLink);
|
||||
|
||||
curLink->link_handlers->remove(curLink);
|
||||
|
||||
// If the endpoint was not connected
|
||||
nvlinkLibCtx.notConnectedEndpoints = ( bConnected ?
|
||||
nvlinkLibCtx.notConnectedEndpoints :
|
||||
nvlinkLibCtx.notConnectedEndpoints - 1 );
|
||||
|
||||
// Update count of registered endpoints
|
||||
nvlinkLibCtx.registeredEndpoints--;
|
||||
}
|
||||
|
||||
nvListDel(&dev->node);
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Associates link with a device in the NVLink Core library
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
* @param[in] link NVLink Link pointer
|
||||
*
|
||||
* return NVL_SUCCESS if the link is registered successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_register_link
|
||||
(
|
||||
nvlink_device *dev,
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
NvlStatus result = NVL_SUCCESS;
|
||||
|
||||
if (dev == NULL || link == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad device or link pointer\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Allocate per-link lock for the link to be registered
|
||||
lock_status = nvlink_lib_link_lock_alloc(link);
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to alloc per-link lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
//
|
||||
// Since the per-link lock will be allocated when this function
|
||||
// is run again. Free the unused allocated lock.
|
||||
//
|
||||
nvlink_lib_link_lock_free(link);
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Assign the linkId for the device
|
||||
link->linkId = (NvU64)(NvUPtr) link;
|
||||
|
||||
// Register the link if not yet registered
|
||||
if (!_nvlink_lib_is_link_registered(dev, link))
|
||||
{
|
||||
// Initialize the node for the link
|
||||
nvListInit(&link->node);
|
||||
|
||||
// Generate token for this link
|
||||
link->token = (NvU64)(NvUPtr) link;
|
||||
|
||||
// Add the link to the list of links for the device
|
||||
nvListAppend(&link->node, &dev->link_list);
|
||||
link->link_handlers->add(link);
|
||||
|
||||
// Initialize training parameters
|
||||
link->safe_retries = 0;
|
||||
link->packet_injection_retries = 0;
|
||||
|
||||
// Update count of registered endpoints
|
||||
nvlinkLibCtx.registeredEndpoints++;
|
||||
|
||||
// Indicate that a new endpoint is registered
|
||||
nvlinkLibCtx.bNewEndpoints = NV_TRUE;
|
||||
|
||||
result = NVL_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: %s: %s is already registered in nvlink core\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
|
||||
result = NVL_ERR_GENERIC;
|
||||
|
||||
// Free per-link lock since we don't have a new link
|
||||
nvlink_lib_link_lock_free(link);
|
||||
}
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unassociates link from a device in the NVLink Core library
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
*
|
||||
* return NVL_SUCCESS if the link is un-registered successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_unregister_link
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
NvBool bConnected = NV_FALSE;
|
||||
nvlink_intranode_conn *intra_conn = NULL;
|
||||
nvlink_internode_conn *inter_conn = NULL;
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
NvU32 numLinks = 0;
|
||||
|
||||
// We will use at most 2 links in this function - the link and it's partner
|
||||
nvlink_link *links[2] = {0};
|
||||
|
||||
if (link == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
links[numLinks] = link;
|
||||
numLinks++;
|
||||
|
||||
// Check if there's an intranode connection present
|
||||
nvlink_core_get_intranode_conn(link, &intra_conn);
|
||||
if (intra_conn != NULL)
|
||||
{
|
||||
// Mark the endpoint as connected
|
||||
bConnected = NV_TRUE;
|
||||
|
||||
if (intra_conn->end0 == link)
|
||||
{
|
||||
links[numLinks] = intra_conn->end1;
|
||||
}
|
||||
else
|
||||
{
|
||||
links[numLinks] = intra_conn->end0;
|
||||
}
|
||||
numLinks++;
|
||||
}
|
||||
|
||||
//
|
||||
// Check if there's an internode connection present
|
||||
// Only the local end required for internode connection
|
||||
// (which is registered above) so just detect this for now
|
||||
//
|
||||
nvlink_core_get_internode_conn(link, &inter_conn);
|
||||
|
||||
// Acquire per-link lock
|
||||
lock_status = nvlink_lib_link_locks_acquire(links, numLinks);
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
nvlink_lib_top_lock_release();
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
if (intra_conn != NULL)
|
||||
{
|
||||
// Remove the associated intranode connection with this link from the list
|
||||
nvlink_core_remove_intranode_conn(intra_conn);
|
||||
}
|
||||
|
||||
if (inter_conn != NULL)
|
||||
{
|
||||
// Remove the associated internode connection with this link from the list
|
||||
nvlink_core_remove_internode_conn(link);
|
||||
}
|
||||
|
||||
// Remove the link from the link list for the device
|
||||
nvListDel(&link->node);
|
||||
|
||||
// Release and free the locks
|
||||
nvlink_lib_link_locks_release(links, numLinks);
|
||||
nvlink_lib_link_lock_free(link);
|
||||
|
||||
link->link_handlers->remove(link);
|
||||
|
||||
// If the endpoint was not connected
|
||||
nvlinkLibCtx.notConnectedEndpoints = ( bConnected ?
|
||||
nvlinkLibCtx.notConnectedEndpoints :
|
||||
nvlinkLibCtx.notConnectedEndpoints - 1 );
|
||||
|
||||
// Update count of registered endpoints
|
||||
nvlinkLibCtx.registeredEndpoints--;
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the nvlink device is already registered in the core library
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
*
|
||||
* return NV_TRUE if the device is already registered
|
||||
*/
|
||||
static NvBool
|
||||
_nvlink_lib_is_device_registered
|
||||
(
|
||||
nvlink_device *dev
|
||||
)
|
||||
{
|
||||
nvlink_device *tmpDev = NULL;
|
||||
|
||||
FOR_EACH_DEVICE_REGISTERED(tmpDev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
if (dev->deviceId == tmpDev->deviceId)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the nvlink link is already registered in the core library
|
||||
*
|
||||
* @param[in] dev NVLink Device pointer
|
||||
* @param[in] link NVLink Link pointer
|
||||
*
|
||||
* return NV_TRUE if the link is already registered for the device
|
||||
*/
|
||||
static NvBool
|
||||
_nvlink_lib_is_link_registered
|
||||
(
|
||||
nvlink_device *dev,
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
nvlink_link *tmpLink = NULL;
|
||||
|
||||
FOR_EACH_LINK_REGISTERED(tmpLink, dev, node)
|
||||
{
|
||||
if (link->linkId == tmpLink->linkId)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_FALSE;
|
||||
}
|
||||
@@ -0,0 +1,777 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
/**
|
||||
* [CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown given links of a device from active to L2 state
|
||||
*
|
||||
* param[in] dev NVLink Device pointer
|
||||
* param[in] linkMask Mask of links to be shutdown
|
||||
* param[in] flags Flags to track if the transition is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the links transition to L2
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_powerdown_links_from_active_to_L2
|
||||
(
|
||||
nvlink_device *dev,
|
||||
NvU32 linkMask,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link *link = NULL;
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU32 numLinks = 0;
|
||||
NvU32 numConns = 0;
|
||||
NvU32 connCount = 0;
|
||||
NvU32 i;
|
||||
NvU32 lockLinkCount = 0;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
|
||||
|
||||
if (dev == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad device pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
lockLinks = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)lockLinks);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Initialize the list of links
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * 32);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
// Get the array of link endpoints whose lock needs to be acquired
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
if (conn == NULL)
|
||||
{
|
||||
//
|
||||
// Could not find the connection for the link. Release the
|
||||
// top-level lock and return
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
|
||||
}
|
||||
else if ((numLinks + 1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: numLinks >= NVLINK_MAX_SYSTEM_LINK_NUM",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_assert(0);
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
status = NVL_ERR_INVALID_STATE;
|
||||
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
|
||||
}
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_powerdown_links_from_active_to_L2_end;
|
||||
}
|
||||
|
||||
// Filter the connections which are already in SLEEP
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the link received a L2 exit request, but never exited L2
|
||||
if (link->powerStateTransitionStatus == nvlink_power_state_exiting_L2)
|
||||
{
|
||||
// Update the power state transition status
|
||||
link->powerStateTransitionStatus = nvlink_power_state_in_L2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
// Check the connection state to verify if the link is already in SLEEP
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SLEEP);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is already in sleep %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Link is not in SLEEP. Update power state transition status for the link
|
||||
link->powerStateTransitionStatus = nvlink_power_state_entering_L2;
|
||||
}
|
||||
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the link desires to enter SLEEP
|
||||
if (link->powerStateTransitionStatus == nvlink_power_state_entering_L2)
|
||||
{
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
// The connection will enter SLEEP only when both its endpoints desire to enter SLEEP
|
||||
if ((conn->end0->powerStateTransitionStatus == nvlink_power_state_entering_L2) &&
|
||||
(conn->end1->powerStateTransitionStatus == nvlink_power_state_entering_L2))
|
||||
{
|
||||
// Increment the #connections considered for entering L2
|
||||
numConns++;
|
||||
|
||||
// Check if the the connection is already included in the list
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
if (conns[i] == conn)
|
||||
break;
|
||||
}
|
||||
|
||||
// If this is a new connection, add it to the list
|
||||
if (i == connCount)
|
||||
{
|
||||
conns[connCount] = conn;
|
||||
connCount++;
|
||||
}
|
||||
}
|
||||
numLinks++;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Clear the status variable
|
||||
status = NVL_SUCCESS;
|
||||
|
||||
if (connCount > 0)
|
||||
{
|
||||
status = nvlink_core_powerdown_intranode_conns_from_active_to_L2(conns, connCount, flags);
|
||||
}
|
||||
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
//
|
||||
// If some links are waiting on the remote end to request sleep,
|
||||
// update status to NVL_MORE_PROCESSING_REQUIRED
|
||||
//
|
||||
status = (numLinks != numConns ? NVL_MORE_PROCESSING_REQUIRED : NVL_SUCCESS);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_powerdown_links_from_active_to_L2_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* [PSEUDO-CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown the given array of links from ACTIVE to OFF state
|
||||
*
|
||||
* param[in] links Array of links to shutdown
|
||||
* param[in] numLinks Number of links to be shutdown
|
||||
* param[in] flags Flags to track if the transition is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the pseudo-clean shutdown is successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_powerdown_links_from_active_to_off
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU32 numConns = 0;
|
||||
NvU32 i;
|
||||
NvU32 lockLinkCount = 0;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
|
||||
|
||||
if ((links == NULL) || (numLinks == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No links to shutdown\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * numLinks));
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)lockLinks);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_powerdown_links_from_active_to_off_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
//
|
||||
// Get the array of both local and remote endpoints whose lock needs
|
||||
// to be acquired
|
||||
//
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
|
||||
if (conn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Capture both the link and its end-point
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
goto nvlink_lib_powerdown_links_from_active_to_off_end;
|
||||
}
|
||||
|
||||
// Sanity checking if the link is already in OFF/RESET state
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
|
||||
if (conn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if both ends of the connection are in L2
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SLEEP);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if both ends and their sublinks are in OFF mode
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_OFF);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if both ends are in RESET
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_RESET);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
conns[numConns] = conn;
|
||||
numConns++;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Reset status to NVL_SUCCESS
|
||||
status = NVL_SUCCESS;
|
||||
|
||||
if (numConns > 0)
|
||||
{
|
||||
status = nvlink_core_powerdown_intranode_conns_from_active_to_off(conns,
|
||||
numConns,
|
||||
flags);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_powerdown_links_from_active_to_off_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
//
|
||||
// Squash status. If any side of link doesn not respond the link is
|
||||
// shutdown unilaterally
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Power down the given array of links from ACTIVE to SWCFG state
|
||||
*
|
||||
* param[in] links Array of links to shutdown
|
||||
* param[in] numLinks Number of links to be shutdown
|
||||
* param[in] flags Flags to track if the transition is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the transitions were successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_powerdown_links_from_active_to_swcfg
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU32 numConns = 0;
|
||||
NvU32 i;
|
||||
NvU32 lockLinkCount = 0;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
|
||||
|
||||
if ((links == NULL) || (numLinks == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No links to shutdown\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Allocate the link locks
|
||||
lockLinks = (nvlink_link **)nvlink_malloc(sizeof(nvlink_link *) * (2 * numLinks));
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)lockLinks);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_powerdown_links_from_active_to_swcfg_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
//
|
||||
// Get the array of both local and remote endpoints whose lock needs
|
||||
// to be acquired
|
||||
//
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
|
||||
if (conn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Capture both the link and its end-point
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
goto nvlink_lib_powerdown_links_from_active_to_swcfg_end;
|
||||
}
|
||||
|
||||
// Sanity checking of links; if already in swfg state, skip it
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
if (conn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if both ends and their sublinks are in SAFE mode
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
conns[numConns] = conn;
|
||||
numConns++;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Reset status to NVL_SUCCESS
|
||||
status = NVL_SUCCESS;
|
||||
|
||||
if (numConns > 0)
|
||||
{
|
||||
status = nvlink_core_powerdown_intranode_conns_from_active_to_swcfg(conns,
|
||||
numConns,
|
||||
flags);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_powerdown_links_from_active_to_swcfg_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
//
|
||||
// Squash status. If any side of link doesn not respond the link is
|
||||
// shutdown unilaterally
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the given array of links
|
||||
*
|
||||
* param[in] links Array of links to be reset
|
||||
* param[in] numLinks Number of links to be shutdown
|
||||
* param[in] flags Flags to track if the transition is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the links were reset successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_reset_links
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU32 numConns = 0;
|
||||
NvU32 i;
|
||||
NvU32 lockLinkCount = 0;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
|
||||
|
||||
if ((links == NULL) || (numLinks == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No links to reset\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Allocate space for the link locks
|
||||
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * numLinks));
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_free((void *)lockLinks);
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * numLinks);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_reset_links_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
// Sanity checking if the link is already in OFF/RESET state
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
if (conn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Capture both the link and its end-point
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
|
||||
conns[numConns] = conn;
|
||||
numConns++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
goto nvlink_lib_reset_links_end;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
if (numConns > 0)
|
||||
{
|
||||
status = nvlink_core_reset_intranode_conns(conns, numConns, flags);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_reset_links_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -0,0 +1,826 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "../nvlink_ctx.h"
|
||||
#include "../nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
|
||||
/**
|
||||
* Check whether a group of links have completed training
|
||||
*
|
||||
* @param[in] links List of NVLink Link pointers
|
||||
* @param[in] linkCount Count of #links
|
||||
*
|
||||
* return NL_SUCCESS if all links transitioned to Active
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_check_training_complete
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 linkCount
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
NvU32 lockLinkCount = 0;
|
||||
NvU32 i;
|
||||
|
||||
if (links == NULL || linkCount == 0)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer or linkCount!\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
lockLinks = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
goto nvlink_lib_check_training_complete_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
//
|
||||
// Get the array of both local and remote endpoints whose lock needs
|
||||
// to be acquired
|
||||
//
|
||||
for (i = 0; i < linkCount; i++)
|
||||
{
|
||||
if ((lockLinkCount + 1) >= NVLINK_MAX_SYSTEM_LINK_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: lockLinkCount >= NVLINK_MAX_SYSTEM_LINK_NUM",
|
||||
__FUNCTION__));
|
||||
|
||||
nvlink_assert(0);
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
status = NVL_ERR_INVALID_STATE;
|
||||
goto nvlink_lib_check_training_complete_end;
|
||||
}
|
||||
|
||||
// Capture both the link and its end-point
|
||||
lockLinks[lockLinkCount] = links[i];
|
||||
lockLinkCount++;
|
||||
}
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
goto nvlink_lib_check_training_complete_end;
|
||||
}
|
||||
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
nvlink_lib_check_training_complete_end:
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Train a given set of links from SWCFG to ACTIVE state
|
||||
*
|
||||
* Note: For training the links one by one - its the responsibility of
|
||||
* the caller to call this function every time for each link
|
||||
*
|
||||
* @param[in] links List of NVLink Link pointers
|
||||
* @param[in] linkCount Count of #links
|
||||
* @param[in] flags Flag to track whether training is sync/async
|
||||
*
|
||||
* return NL_SUCCESS if the link state transition was a success
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_train_links_from_swcfg_to_active
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 linkCount,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU32 connCount = 0;
|
||||
NvU32 i, j;
|
||||
nvlink_link **lockLinks = NULL;
|
||||
NvU32 lockLinkCount = 0;
|
||||
|
||||
if (links == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
lockLinks = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * (2 * linkCount));
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * linkCount);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_train_links_from_swcfg_to_active_end;
|
||||
}
|
||||
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * linkCount);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_train_links_from_swcfg_to_active_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
//
|
||||
// Get the array of both local and remote endpoints whose lock needs
|
||||
// to be acquired
|
||||
//
|
||||
for (i = 0; i < linkCount; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
|
||||
if (!conn)
|
||||
{
|
||||
//
|
||||
// Could not find the connection for the link. Release the
|
||||
// top-level lock and return
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_train_links_from_swcfg_to_active_end;
|
||||
}
|
||||
|
||||
// Capture both the link and its end-point
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
goto nvlink_lib_train_links_from_swcfg_to_active_end;
|
||||
}
|
||||
|
||||
// Check all the connections which need to be trained
|
||||
for (i = 0; i < linkCount; i++)
|
||||
{
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(links[i], &conn);
|
||||
|
||||
// Don't train links that didn't receive CONFIG_GOOD (NVLINK3+)
|
||||
if (((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
|
||||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30)) &&
|
||||
(!links[i]->bInitnegotiateConfigGood))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the link is already in ACTIVE
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
|
||||
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can train connections to HS only when they are already in SAFE
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the connection is not already considered
|
||||
for (j = 0; j < connCount; j++)
|
||||
{
|
||||
if (conns[j] == conn)
|
||||
break;
|
||||
}
|
||||
|
||||
// If this is a new connection, add it to the list
|
||||
if (j == connCount)
|
||||
{
|
||||
conns[connCount] = conn;
|
||||
connCount++;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
if (connCount > 0)
|
||||
{
|
||||
if ((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
|
||||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30))
|
||||
{
|
||||
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(conns,
|
||||
connCount,
|
||||
flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(conns,
|
||||
connCount,
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_train_links_from_swcfg_to_active_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Train a given set of links of a device from L2 to ACTIVE state
|
||||
*
|
||||
* param[in] dev NVLink Device pointer
|
||||
* param[in] linkMask Mask of links to be trained
|
||||
* param[in] flags Flags to track if the transition is sync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the links train to ACTIVE
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_train_links_from_L2_to_active
|
||||
(
|
||||
nvlink_device *dev,
|
||||
NvU32 linkMask,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link *link = NULL;
|
||||
nvlink_intranode_conn **conns = NULL;
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvU32 numLinks = 0;
|
||||
NvU32 numConns = 0;
|
||||
NvU32 connCount = 0;
|
||||
NvU32 i;
|
||||
|
||||
nvlink_link **lockLinks = NULL;
|
||||
NvU32 lockLinkCount = 0;
|
||||
|
||||
|
||||
if (dev == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad device pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
lockLinks = (nvlink_link **)nvlink_malloc(
|
||||
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
if (lockLinks == NULL)
|
||||
{
|
||||
return NVL_NO_MEM;
|
||||
}
|
||||
|
||||
// Allocate space for the connection list
|
||||
conns = (nvlink_intranode_conn **)nvlink_malloc(
|
||||
sizeof(nvlink_intranode_conn *) * NVLINK_MAX_SYSTEM_LINK_NUM);
|
||||
|
||||
if (conns == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate space for connections list\n",
|
||||
__FUNCTION__));
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_train_links_from_L2_to_active_end;
|
||||
}
|
||||
|
||||
// Initialize the list of links
|
||||
nvlink_memset(conns, 0, sizeof(nvlink_intranode_conn *) * 32);
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
goto nvlink_lib_train_links_from_L2_to_active_end;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
// Get the array of link endpoints whose lock needs to be acquired
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
if (conn == NULL)
|
||||
{
|
||||
//
|
||||
// Could not find the connection for the link. Release the
|
||||
// top-level lock and return
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
goto nvlink_lib_train_links_from_L2_to_active_end;
|
||||
}
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end0;
|
||||
lockLinkCount++;
|
||||
|
||||
lockLinks[lockLinkCount] = conn->end1;
|
||||
lockLinkCount++;
|
||||
}
|
||||
|
||||
// Acquire the per-link locks for all links captured
|
||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
goto nvlink_lib_train_links_from_L2_to_active_end;
|
||||
}
|
||||
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the link received a L2 entry request, but never entered L2
|
||||
if (link->powerStateTransitionStatus == nvlink_power_state_entering_L2)
|
||||
{
|
||||
// Update the power state transition status
|
||||
link->powerStateTransitionStatus = nvlink_power_state_in_L0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
// Check the connection state to verify if the link is already in HS
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Link is not in sleep %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check the connection state to verify if the link is already in SAFE
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link is not in sleep %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Mark the power state transition for the link
|
||||
link->powerStateTransitionStatus = nvlink_power_state_exiting_L2;
|
||||
}
|
||||
|
||||
FOR_EACH_LINK_REGISTERED(link, dev, node)
|
||||
{
|
||||
if (!(linkMask & (1 << link->linkNumber)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (link->powerStateTransitionStatus == nvlink_power_state_exiting_L2)
|
||||
{
|
||||
// Get the connection associated with the link
|
||||
conn = NULL;
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
// Verify if both the endpoints desire to exit SLEEP
|
||||
if ((conn->end0->powerStateTransitionStatus == nvlink_power_state_exiting_L2) &&
|
||||
(conn->end1->powerStateTransitionStatus == nvlink_power_state_exiting_L2))
|
||||
{
|
||||
// Increment the #connections considered for exiting L2
|
||||
numConns++;
|
||||
|
||||
// Check if the the connection is already included in the list
|
||||
for (i = 0; i < connCount; i++)
|
||||
{
|
||||
if (conns[i] == conn)
|
||||
break;
|
||||
}
|
||||
|
||||
// If this is a new connection, add it to the list
|
||||
if (i == connCount)
|
||||
{
|
||||
conns[connCount] = conn;
|
||||
connCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Increment the #links considered for exiting L2
|
||||
numLinks++;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// The connection list traversal is also complete now
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Clear the status variable
|
||||
status = NVL_SUCCESS;
|
||||
|
||||
if (connCount > 0)
|
||||
{
|
||||
status = nvlink_core_train_intranode_conns_from_from_L2_to_active(conns, connCount, flags);
|
||||
}
|
||||
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
//
|
||||
// If some links are waiting on the remote end to exit sleep,
|
||||
// update status to NVL_MORE_PROCESSING_REQUIRED
|
||||
//
|
||||
status = (numLinks != numConns ? NVL_MORE_PROCESSING_REQUIRED : NVL_SUCCESS);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(lockLinks, lockLinkCount);
|
||||
|
||||
nvlink_lib_train_links_from_L2_to_active_end:
|
||||
|
||||
if (conns != NULL)
|
||||
{
|
||||
nvlink_free((void *)conns);
|
||||
}
|
||||
|
||||
if (lockLinks != NULL)
|
||||
{
|
||||
nvlink_free((void *)lockLinks);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrain a given link from SWCFG to ACTIVE
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[in] flags Flag to track if the training is aync/async
|
||||
*
|
||||
* return NVL_SUCCESS if the training was successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_retrain_link_from_swcfg_to_active
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU32 flags
|
||||
)
|
||||
{
|
||||
nvlink_intranode_conn *conn = NULL;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
if (!link)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad link pointer specified.\n",
|
||||
__FUNCTION__));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the top-level lock
|
||||
status = nvlink_lib_top_lock_acquire();
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// Top-level lock is now acquired. Proceed to traversing the device
|
||||
// and link lists and connection lists
|
||||
//
|
||||
|
||||
// Get the connection associated with the link
|
||||
nvlink_core_get_intranode_conn(link, &conn);
|
||||
|
||||
if (!conn)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No connection was found for %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
|
||||
//
|
||||
// Could not find the connection for the link. Release the
|
||||
// top-level lock and return
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// create array of one conn and two link endpoints
|
||||
nvlink_intranode_conn *conns[1] = {conn};
|
||||
nvlink_link *links[2] = {0};
|
||||
|
||||
links[0] = conn->end0;
|
||||
links[1] = conn->end1;
|
||||
|
||||
// Acquire the per-link locks for the links
|
||||
status = nvlink_lib_link_locks_acquire(links, 2);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link locks\n",
|
||||
__FUNCTION__));
|
||||
|
||||
// Release the top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//
|
||||
// All the required per-link locks are successfully acquired
|
||||
// Release the top level-lock
|
||||
//
|
||||
nvlink_lib_top_lock_release();
|
||||
|
||||
// Check if the link is already in ACTIVE
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_HS);
|
||||
if ((status == NVL_SUCCESS) || (status == NVL_ERR_INVALID_STATE))
|
||||
{
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, 2);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// We can train connections to HS only when they are already in SAFE
|
||||
status = nvlink_core_check_intranode_conn_state(conn, NVLINK_LINKSTATE_SAFE);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, 2);
|
||||
|
||||
return status;
|
||||
}
|
||||
if ((conn->end0->version >= NVLINK_DEVICE_VERSION_30) ||
|
||||
(conn->end1->version >= NVLINK_DEVICE_VERSION_30))
|
||||
|
||||
{
|
||||
if (!conn->end0->bInitnegotiateConfigGood ||
|
||||
!conn->end1->bInitnegotiateConfigGood)
|
||||
{
|
||||
status = NVL_ERR_GENERIC;
|
||||
}
|
||||
else
|
||||
{
|
||||
// ALT training for NVLink3.0+
|
||||
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(conns, 0x1, flags);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy training for pre-NVLink3.0
|
||||
status = nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(conns, 0x1, flags);
|
||||
}
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(links, 2);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save training seeds into the link structure
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[in] seedData Training seed information
|
||||
*
|
||||
* return NVL_SUCCESS if the seed saving was successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_save_training_seeds
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU32 *seedData
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
// Check to make sure we are given a buffer of data
|
||||
if (seedData == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No seed data given to store %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
NvU32 size = seedData[0];
|
||||
|
||||
// check to make sure the size is not out of bounds
|
||||
if (size > NVLINK_MAX_SEED_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad data, size of %d out of bounds %s: %s.\n",
|
||||
__FUNCTION__, size, link->dev->deviceName, link->linkName));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the per-link lock
|
||||
status = nvlink_lib_link_locks_acquire(&link, 1);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
//always using corelib defined structures for size
|
||||
nvlink_memcpy(link->seedData, seedData, sizeof(link->seedData));
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(&link, 1);
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy training seeds from the link structure
|
||||
*
|
||||
* @param[in] link NVLink Link pointer
|
||||
* @param[in] seedData Training seed information
|
||||
*
|
||||
* return NVL_SUCCESS if the seed copy was successful
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_copy_training_seeds
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU32 *seedDataCopy
|
||||
)
|
||||
{
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
|
||||
// Check to make sure we are given a buffer to copy into
|
||||
if (seedDataCopy == NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: No seed data structure given to store into %s: %s.\n",
|
||||
__FUNCTION__, link->dev->deviceName, link->linkName));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
NvU32 size = link->seedData[0];
|
||||
|
||||
// check to make sure the size is not out of bounds
|
||||
if (size > NVLINK_MAX_SEED_NUM)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Bad data, size of %d out of bounds %s: %s.\n",
|
||||
__FUNCTION__, size, link->dev->deviceName, link->linkName));
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Acquire the per-link lock
|
||||
status = nvlink_lib_link_locks_acquire(&link, 1);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire per-link lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
nvlink_memcpy(seedDataCopy, link->seedData, sizeof(link->seedData));
|
||||
|
||||
// Release the per-link locks
|
||||
nvlink_lib_link_locks_release(&link, 1);
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
90
src/common/nvlink/kernel/nvlink/nvlink_ctx.h
Normal file
90
src/common/nvlink/kernel/nvlink/nvlink_ctx.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_CTX_H_
|
||||
#define _NVLINK_CTX_H_
|
||||
|
||||
//
|
||||
// Link transition times in ms.
|
||||
// TODO: Review with HW for optimal transition times;
|
||||
//
|
||||
#define LINK_TRANSITION_TIME_OFF 1
|
||||
#define LINK_TRANSITION_TIME_SAFE 5
|
||||
#define LINK_TRANSITION_TIME_HS 500
|
||||
#define LINK_TRANSITION_TIMEOUT_IN_MS 2000
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/*
|
||||
* Lock for all core lib structures except nvlink_link structures
|
||||
*/
|
||||
void *topLevelLock;
|
||||
|
||||
/*
|
||||
* Head of the device-list
|
||||
*/
|
||||
nvlink_device nv_devicelist_head;
|
||||
|
||||
/*
|
||||
* Head of the established intranode nvlink connections list
|
||||
*/
|
||||
nvlink_intranode_conn nv_intraconn_head;
|
||||
|
||||
/*
|
||||
* Head of the added internode nvlink connections list
|
||||
*/
|
||||
nvlink_internode_conn nv_interconn_head;
|
||||
|
||||
/*
|
||||
* Topology information
|
||||
* registeredEndpoints : #Endpoints registered in the core library
|
||||
* connectedEndpoints : #Endpoints whose remote has been determined
|
||||
* notConnectedEndpoints: #Endpoints whose remote has not been determined
|
||||
*/
|
||||
NvU32 registeredEndpoints;
|
||||
NvU32 connectedEndpoints;
|
||||
NvU32 notConnectedEndpoints;
|
||||
NvBool bNewEndpoints;
|
||||
|
||||
/*
|
||||
* Endpoint count in different link states
|
||||
* endpointsInSafe : #Endpoints in SAFE state
|
||||
* endpointsInFail : #Endpoints that failed to transition to ACTIVE
|
||||
* endpointsInActive: #Endpoints in ACTIVE
|
||||
*/
|
||||
NvU32 endpointsInSafe;
|
||||
NvU32 endpointsInFail;
|
||||
NvU32 endpointsInActive;
|
||||
|
||||
/*
|
||||
* Fabric node id set by ioctl interface. This id will be assigned to each
|
||||
* nvlink device during registration and matched for endpoint look-up on
|
||||
* ioctls, which operate on endpoints.
|
||||
*/
|
||||
NvU16 nodeId;
|
||||
}nvlink_lib_context;
|
||||
|
||||
extern nvlink_lib_context nvlinkLibCtx;
|
||||
|
||||
#endif //_NVLINK_CTX_H_
|
||||
|
||||
365
src/common/nvlink/kernel/nvlink/nvlink_helper.h
Normal file
365
src/common/nvlink/kernel/nvlink/nvlink_helper.h
Normal file
@@ -0,0 +1,365 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVLINK_HELPER_H_
|
||||
#define _NVLINK_HELPER_H_
|
||||
|
||||
|
||||
//
|
||||
// fabric node id will be used as MSB 16 bits of the link token value to
|
||||
// generate a unique token for discovering connections
|
||||
//
|
||||
#define NVLINK_FABRIC_NODE_ID_MASK 0xFFFF
|
||||
#define NVLINK_FABRIC_NODE_ID_POS 48
|
||||
|
||||
/**
|
||||
* Check if the device type is supported
|
||||
*/
|
||||
NvBool nvlink_core_is_supported_device_type(NvU32 devType);
|
||||
|
||||
/**
|
||||
* Get the link and sublink states for the endpoint
|
||||
*/
|
||||
void nvlink_core_get_endpoint_state(nvlink_link *link, nvlink_link_state *linkState);
|
||||
|
||||
/**
|
||||
* Get the nvlink_device * from the PCI DBDF
|
||||
*/
|
||||
void nvlink_core_get_device_by_devinfo(nvlink_device_info *devInfo, nvlink_device **dev);
|
||||
|
||||
/**
|
||||
* Get the nvlink_link * from the PCI DBDF and link#
|
||||
*/
|
||||
void nvlink_core_get_link_by_endpoint(nvlink_endpoint *endPoint, nvlink_link **link);
|
||||
|
||||
/**
|
||||
* Given the nvlink_link ptr, copy the endpoint details for the link
|
||||
*/
|
||||
void nvlink_core_copy_endpoint_info(nvlink_link *connLink, nvlink_endpoint *endPointInfo);
|
||||
|
||||
/**
|
||||
* Given the nvlink_device ptr, copy the device details
|
||||
*/
|
||||
void nvlink_core_copy_device_info(nvlink_device *tmpDev, nvlink_detailed_dev_info *devInfo);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/****************************** NVLink initialization functions *********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* Kick-off INITPHASE1 on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_initphase1(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Kick-off INITRXTERM on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_rx_init_term(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Kick-off receiver detect on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_set_rx_detect(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Get receiver detect status on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_get_rx_detect(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Get Enable TX common mode on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_enable_common_mode(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Get Disable TX common mode on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_disable_common_mode(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Perform RX calibration on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_calibrate_links(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Enable data on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_enable_data(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
/**
|
||||
* Transition to SWCFG on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_link_init_async(nvlink_link **links, NvU32 numLinks);
|
||||
|
||||
/**
|
||||
* Poll on SAFE/SWCFG on the given link
|
||||
*/
|
||||
NvlStatus nvlink_core_wait_for_link_init(nvlink_link *link);
|
||||
|
||||
/**
|
||||
* Initialize all the endpoints from OFF to SWCFG state
|
||||
*/
|
||||
void nvlink_core_init_links_from_off_to_swcfg(nvlink_link **pLinks,
|
||||
NvU32 numLinks,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Send INITNEGOTIATE command on the given array of links
|
||||
*/
|
||||
NvlStatus nvlink_core_initnegotiate(nvlink_link **links, NvU32 numLinks, NvU32 flags);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/*************************** NVLink topology discovery functions ********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* Generate a discovery token for the given link
|
||||
*/
|
||||
NvU64 nvlink_core_get_link_discovery_token(nvlink_link *link);
|
||||
|
||||
/**
|
||||
* Write the dicovery token for the given link
|
||||
*/
|
||||
NvlStatus nvlink_core_write_link_discovery_token(nvlink_link *link, NvU64 token);
|
||||
|
||||
/**
|
||||
* Read the dicovery token for the given link
|
||||
*/
|
||||
NvU64 nvlink_core_read_link_discovery_token(nvlink_link *link);
|
||||
|
||||
/**
|
||||
* Detect the connection by correlating the tokens
|
||||
*/
|
||||
void nvlink_core_correlate_conn_by_token(nvlink_link *srcLink, NvU64 writeToken, NvBool skipReadToken);
|
||||
|
||||
/**
|
||||
* For a given end of a link, returns the other end its connected to.
|
||||
*/
|
||||
void nvlink_core_discover_and_get_remote_end(nvlink_link *end,
|
||||
nvlink_link **remote_end,
|
||||
NvU32 flags);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/********************************** NVLink training functions ***********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* Train the internode connection link from SWCFG to ACTIVE
|
||||
*/
|
||||
NvlStatus nvlink_core_train_internode_conns_from_swcfg_to_active(nvlink_internode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 *isMasterEnd,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Train the internode connection sublink to enter high speed
|
||||
*/
|
||||
NvlStatus nvlink_core_train_internode_conn_sublink_from_safe_to_hs(nvlink_internode_conn *conn,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Train a given set of intranode connections from L2 to ACTIVE state
|
||||
*/
|
||||
NvlStatus nvlink_core_train_intranode_conns_from_from_L2_to_active(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Train intranode connections associated with a list of links to HS using ALT sequence
|
||||
*/
|
||||
NvlStatus nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
|
||||
/**
|
||||
* Train a single intranode connection associated with a list of links to HS using legacy
|
||||
* pre-Ampere sequence
|
||||
*/
|
||||
NvlStatus nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
/************************************************************************************************/
|
||||
/********************************** NVLink shutdown functions ***********************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* [CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown given intranode connections from active to L2 state
|
||||
*/
|
||||
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_L2(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* [PSEUDO-CLEAN SHUTDOWN]
|
||||
*
|
||||
* Shutdown the given array of intranode connections from ACTIVE to OFF state
|
||||
*/
|
||||
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_off(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Power down the given array of intranode connections from ACTIVE to SWCFG state
|
||||
*/
|
||||
NvlStatus nvlink_core_powerdown_intranode_conns_from_active_to_swcfg(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
/**
|
||||
* Reset the given array of intranode connections
|
||||
*/
|
||||
NvlStatus nvlink_core_reset_intranode_conns(nvlink_intranode_conn **conns,
|
||||
NvU32 connCount,
|
||||
NvU32 flags);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/**************************** NVLink connection management functions ****************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* For a given link, return the associated internode connection
|
||||
*/
|
||||
void nvlink_core_get_internode_conn(nvlink_link *localLink,
|
||||
nvlink_internode_conn **conn);
|
||||
|
||||
/**
|
||||
* Add a new internode connection to the list of connections
|
||||
*/
|
||||
NvlStatus nvlink_core_add_internode_conn(nvlink_link *localLink,
|
||||
nvlink_remote_endpoint_info *remoteEndPoint);
|
||||
|
||||
/**
|
||||
* For a given link, delete the associated internode connection
|
||||
*/
|
||||
void nvlink_core_remove_internode_conn(nvlink_link *localLink);
|
||||
|
||||
/**
|
||||
* For a given link, return the associated intranode connection
|
||||
*/
|
||||
void nvlink_core_get_intranode_conn(nvlink_link *endpoint,
|
||||
nvlink_intranode_conn **conn);
|
||||
|
||||
/**
|
||||
* Add a new intranode connection to the list of intranode connections
|
||||
*/
|
||||
NvlStatus nvlink_core_add_intranode_conn(nvlink_link *end0, nvlink_link *end1);
|
||||
|
||||
/**
|
||||
* Remove the connection from the list of intranode connections
|
||||
*/
|
||||
void nvlink_core_remove_intranode_conn(nvlink_intranode_conn *conn);
|
||||
|
||||
/**
|
||||
* Check if the given intranode connection is in the specified mode
|
||||
*/
|
||||
NvlStatus nvlink_core_check_intranode_conn_state(nvlink_intranode_conn *conn,
|
||||
NvU64 linkMode);
|
||||
|
||||
/**
|
||||
* Copy the intranode connection's remote endpoint information into the nvlink_conn_info
|
||||
* structure passed in
|
||||
*/
|
||||
void nvlink_core_copy_intranode_conn_info(nvlink_link *remote_end,
|
||||
nvlink_conn_info *conn_info);
|
||||
|
||||
/**
|
||||
* Copy the internode connection's remote endpoint information into the nvlink_conn_info
|
||||
* structure passed in
|
||||
*/
|
||||
void nvlink_core_copy_internode_conn_info(nvlink_remote_endpoint_info *remote_end,
|
||||
nvlink_conn_info *conn_info);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/******************************* NVLink link management functions *******************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
/**
|
||||
* For the given link, check whether the link state is at the requested state
|
||||
*/
|
||||
NvBool nvlink_core_check_link_state(nvlink_link *link, NvU64 linkState);
|
||||
|
||||
/**
|
||||
* For the given link, check whether the tx sublink state is at the requested state
|
||||
*/
|
||||
NvBool nvlink_core_check_tx_sublink_state(nvlink_link *link, NvU64 txSublinkState);
|
||||
|
||||
/**
|
||||
* For the given link, check whether the rx sublink state is at the requested state
|
||||
*/
|
||||
NvBool nvlink_core_check_rx_sublink_state(nvlink_link *link, NvU64 rxSublinkState);
|
||||
|
||||
/**
|
||||
* Poll for the link to reach the particular state upto the given timeout. The link
|
||||
* state transition is considered failed once timeout occurs
|
||||
*/
|
||||
NvlStatus nvlink_core_poll_link_state(nvlink_link *link,
|
||||
NvU64 linkState,
|
||||
NvU32 timeout);
|
||||
/**
|
||||
* Poll for a given timeout period for a sublink to reach the particular state. The
|
||||
* sublink state transition is considered failed once timeout occurs
|
||||
*/
|
||||
NvlStatus nvlink_core_poll_sublink_state(nvlink_link *localTxSubLink,
|
||||
NvU64 localTxSubLinkState,
|
||||
NvU32 localTxSubLinkSubtate,
|
||||
nvlink_link *remoteRxSubLink,
|
||||
NvU64 remoteRxSubLinkState,
|
||||
NvU32 remoteRxSubLinkSubstate,
|
||||
NvU32 timeout);
|
||||
|
||||
/**
|
||||
* Poll for the tx sublink to reach the specified state upto the given timeout. The
|
||||
* sublink state transition is considered failed once timeout occurs
|
||||
*/
|
||||
NvlStatus nvlink_core_poll_tx_sublink_state(nvlink_link *link,
|
||||
NvU64 txSublinkState,
|
||||
NvU32 txSublinkSubState,
|
||||
NvU32 timeout);
|
||||
|
||||
/**
|
||||
* Poll for the rx sublink to reach the specified state upto the given timeout. The
|
||||
* sublink state transition is considered failed once timeout occurs
|
||||
*/
|
||||
NvlStatus nvlink_core_poll_rx_sublink_state(nvlink_link *link,
|
||||
NvU64 rxSublinkState,
|
||||
NvU32 rxSublinkSubState,
|
||||
NvU32 timeout);
|
||||
|
||||
/************************************************************************************************/
|
||||
/****************** Nvlink print functions for devices/links/connections ************************/
|
||||
/************************************************************************************************/
|
||||
|
||||
void nvlink_core_print_link_state(nvlink_link *link);
|
||||
void nvlink_core_print_intranode_conn(nvlink_intranode_conn *conn);
|
||||
|
||||
|
||||
#endif //_NVLINK_HELPER_H_
|
||||
163
src/common/nvlink/kernel/nvlink/nvlink_lib_mgmt.c
Normal file
163
src/common/nvlink/kernel/nvlink/nvlink_lib_mgmt.c
Normal file
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "nvlink_ctx.h"
|
||||
#include "nvlink_helper.h"
|
||||
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
nvlink_lib_context nvlinkLibCtx = {0};
|
||||
|
||||
/*
|
||||
* Initialize the nvlink core library
|
||||
*
|
||||
* return NVL_SUCCESS if the library is initialized successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_initialize(void)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
|
||||
if (nvlinkLibCtx.nv_devicelist_head.initialized == 0)
|
||||
{
|
||||
// Allocate top-level lock
|
||||
lock_status = nvlink_lib_top_lock_alloc();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Initialize the device list head
|
||||
nvListInit(&nvlinkLibCtx.nv_devicelist_head.link_list);
|
||||
nvListInit(&nvlinkLibCtx.nv_devicelist_head.node);
|
||||
nvlinkLibCtx.nv_devicelist_head.initialized = 1;
|
||||
|
||||
// Initialize the intranode connection list head
|
||||
nvListInit(&nvlinkLibCtx.nv_intraconn_head.node);
|
||||
|
||||
// Initialize the internode connection list head
|
||||
nvListInit(&nvlinkLibCtx.nv_interconn_head.node);
|
||||
|
||||
// Initialize registered and connected links to 0
|
||||
nvlinkLibCtx.registeredEndpoints = 0;
|
||||
nvlinkLibCtx.connectedEndpoints = 0;
|
||||
nvlinkLibCtx.notConnectedEndpoints = 0;
|
||||
|
||||
//
|
||||
// Initialize fabric node id to max value until set
|
||||
// by ioctl interface
|
||||
//
|
||||
nvlinkLibCtx.nodeId = NV_U16_MAX ;
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unload the nvlink core library
|
||||
*
|
||||
* return NVL_SUCCESS if the library is unloaded successfully
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_unload(void)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
|
||||
if (nvlink_lib_is_initialized())
|
||||
{
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Check if there are no devices registered
|
||||
if (nvlink_lib_is_device_list_empty())
|
||||
{
|
||||
nvlinkLibCtx.nv_devicelist_head.initialized = 0;
|
||||
}
|
||||
|
||||
// Release and free top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
nvlink_lib_top_lock_free();
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the nvlink core library is initialized
|
||||
*
|
||||
* return NV_TRUE if the core library is already initialized
|
||||
*/
|
||||
NvBool
|
||||
nvlink_lib_is_initialized(void)
|
||||
{
|
||||
return nvlinkLibCtx.nv_devicelist_head.initialized;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there are any devices registered
|
||||
*
|
||||
* return NV_TRUE if there are devices registered in the core library
|
||||
*/
|
||||
NvBool
|
||||
nvlink_lib_is_device_list_empty(void)
|
||||
{
|
||||
NvBool isEmpty = NV_TRUE;
|
||||
|
||||
isEmpty = nvListIsEmpty(&nvlinkLibCtx.nv_devicelist_head.node);
|
||||
|
||||
return isEmpty;
|
||||
}
|
||||
|
||||
488
src/common/nvlink/kernel/nvlink/nvlink_lock.c
Normal file
488
src/common/nvlink/kernel/nvlink/nvlink_lock.c
Normal file
@@ -0,0 +1,488 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvlink.h"
|
||||
#include "nvtypes.h"
|
||||
#include "nvlink_export.h"
|
||||
#include "nvlink_os.h"
|
||||
#include "nvlink_ctx.h"
|
||||
#include "nvlink_helper.h"
|
||||
#include "nvlink_lock.h"
|
||||
|
||||
//
|
||||
// Only enabling locking for testing purposes at the moment.
|
||||
// Disabled at all other times.
|
||||
//
|
||||
#define LOCKING_DISABLED 1
|
||||
|
||||
static void _sort_links(nvlink_link **, NvU32, NvBool (*)(void *, void *));
|
||||
static NvBool _compare(void *, void *);
|
||||
|
||||
/*
|
||||
* Allocate top level lock. Return NVL_SUCCESS if
|
||||
* the lock was allocated else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_top_lock_alloc(void)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
void *top_lock = NULL;
|
||||
|
||||
// Check if top level lock is already allocated
|
||||
if (nvlinkLibCtx.topLevelLock != NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Top-level lock already allocated\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
top_lock = nvlink_allocLock();
|
||||
if (NULL == top_lock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlinkLibCtx.topLevelLock = top_lock;
|
||||
|
||||
// Top-level lock allocated
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free top level lock. Return NVL_SUCCESS if
|
||||
* the lock was freed else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_top_lock_free(void)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if already freed
|
||||
if (NULL == nvlinkLibCtx.topLevelLock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Top-level lock not allocated/already freed\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlink_freeLock(nvlinkLibCtx.topLevelLock);
|
||||
nvlinkLibCtx.topLevelLock = NULL;
|
||||
|
||||
// Top-level lock freed
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate per-link lock. Return NVL_SUCCESS if
|
||||
* the lock was allocated else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_link_lock_alloc
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
void *link_lock = NULL;
|
||||
|
||||
// Check if already allocated
|
||||
if (link->linkLock != NULL)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link lock already allocated on this link\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
link_lock = nvlink_allocLock();
|
||||
if (NULL == link_lock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to allocate link lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Link lock allocated
|
||||
link->linkLock = link_lock;
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free per-link lock. Return NVL_SUCCESS if
|
||||
* the lock was freed else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_link_lock_free
|
||||
(
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if already freed
|
||||
if (NULL == link->linkLock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Link lock not allocated/already freed\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
nvlink_freeLock(link->linkLock);
|
||||
link->linkLock = NULL;
|
||||
|
||||
// Link lock freed
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire top level lock. Return NVL_SUCCESS if
|
||||
* the lock was acquired else return NVL_ERR_STATE_IN_USE.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_top_lock_acquire(void)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if top-level lock is allocated before attempting to acquire
|
||||
if (NULL == nvlinkLibCtx.topLevelLock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Top-level lock is not allocated\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
//
|
||||
// ToDo: Check if the lock was acquired succesfully
|
||||
// Currently the nvlink_acquireLock function doesn't report failures
|
||||
//
|
||||
nvlink_acquireLock(nvlinkLibCtx.topLevelLock);
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Acquired top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release top level lock. Return NVL_SUCCESS if
|
||||
* the lock was released else return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_top_lock_release(void)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if top-level lock is allocated before attempting to release
|
||||
if (NULL == nvlinkLibCtx.topLevelLock)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Top-level lock is not allocated\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
//
|
||||
// ToDo: Check if the lock was released succesfully
|
||||
// Currently the nvlink_releaseLock function doesn't report failures
|
||||
//
|
||||
nvlink_releaseLock(nvlinkLibCtx.topLevelLock);
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Released top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the array of links in order of (DBDF, link#) -
|
||||
* lowest to highest and acquire link locks.
|
||||
* Return NVL_SUCCESS if all the link locks were acquired.
|
||||
* Else if any link lock failed to be acquired, release
|
||||
* all acquired link locks and return NVL_ERR_STATE_IN_USE.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_link_locks_acquire
|
||||
(
|
||||
nvlink_link **links,
|
||||
int numLinks
|
||||
)
|
||||
{
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
int i;
|
||||
|
||||
nvlink_link *link_prev = NULL;
|
||||
|
||||
// Check if array of links is already empty before attempting to release.
|
||||
if ((NULL == links) || (numLinks == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Could not release the link locks. Link array is empty !\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Sort the link array in increasing order of (DBDF, link#)
|
||||
_sort_links(links, numLinks, _compare);
|
||||
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
//
|
||||
// Don't acquire locks on loop back links twice since the current link is
|
||||
// the same as the previous one
|
||||
//
|
||||
if (links[i] != link_prev)
|
||||
{
|
||||
nvlink_acquireLock(links[i]->linkLock);
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Acquire link lock for dom:%d bus:%d dev:%d fun:%d link:%d\n",
|
||||
__FUNCTION__,
|
||||
|
||||
links[i]->dev->pciInfo.domain, links[i]->dev->pciInfo.bus,
|
||||
links[i]->dev->pciInfo.device, links[i]->dev->pciInfo.function,
|
||||
links[i]->linkNumber));
|
||||
}
|
||||
|
||||
link_prev = links[i];
|
||||
}
|
||||
|
||||
//
|
||||
// ToDo: Check if the lock was acquired succesfully
|
||||
// Currently the nvlink_acquireLock function doesn't report failures
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over all the links and call nvlink_releaseLock(links[i]->linkLock).
|
||||
* Return NVL_SUCCESS if all the link locks were released.
|
||||
* Else if any link lock failed to be released return NVL_ERR_GENERIC.
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_link_locks_release
|
||||
(
|
||||
nvlink_link **links,
|
||||
int numLinks
|
||||
)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (LOCKING_DISABLED)
|
||||
{
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
nvlink_link *link_prev = NULL;
|
||||
|
||||
// Check if array of links is already empty before attempting to release.
|
||||
if ((NULL == links) || (numLinks == 0))
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Could not release the link locks. Link array is empty !\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Sort the link array in increasing order of (DBDF, link#)
|
||||
_sort_links(links, numLinks, _compare);
|
||||
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
//
|
||||
// Don't release locks on loop back links twice since the current link is
|
||||
// the same as the previous one
|
||||
//
|
||||
if (links[i] != link_prev)
|
||||
{
|
||||
nvlink_releaseLock(links[i]->linkLock);
|
||||
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
|
||||
"%s: Release link lock for dom:%d bus:%d dev:%d fun:%d link:%d\n",
|
||||
__FUNCTION__,
|
||||
|
||||
links[i]->dev->pciInfo.domain, links[i]->dev->pciInfo.bus,
|
||||
links[i]->dev->pciInfo.device, links[i]->dev->pciInfo.function,
|
||||
links[i]->linkNumber));
|
||||
}
|
||||
|
||||
link_prev = links[i];
|
||||
}
|
||||
|
||||
//
|
||||
// ToDo: Check if the lock was released succesfully
|
||||
// Currently the nvlink_releaseLock function doesn't report failures
|
||||
//
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sorts the links in the increasing order of DBDF, link#
|
||||
*/
|
||||
static void
|
||||
_sort_links
|
||||
(
|
||||
nvlink_link **links,
|
||||
NvU32 numLinks,
|
||||
NvBool (*compare)(void *, void *)
|
||||
)
|
||||
{
|
||||
nvlink_link *temp = NULL;
|
||||
NvU32 i, j;
|
||||
|
||||
for (i = 0; i < numLinks; i++)
|
||||
{
|
||||
for (j = i + 1; j < numLinks; j++)
|
||||
{
|
||||
if (_compare(links[j], links[i]))
|
||||
{
|
||||
temp = links[i];
|
||||
links[i] = links[j];
|
||||
links[j] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare function for _nvlink_sort - compares DBDF, link#
|
||||
*/
|
||||
static NvBool
|
||||
_compare
|
||||
(
|
||||
void *link1,
|
||||
void *link2
|
||||
)
|
||||
{
|
||||
nvlink_link *l1 = (nvlink_link *) link1;
|
||||
nvlink_link *l2 = (nvlink_link *) link2;
|
||||
|
||||
// Compare link domains
|
||||
if (l1->dev->pciInfo.domain < l2->dev->pciInfo.domain)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
if (l1->dev->pciInfo.domain > l2->dev->pciInfo.domain)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
// Domain is same for devices of links. Compare bus next
|
||||
|
||||
// Compare link buses
|
||||
if (l1->dev->pciInfo.bus < l2->dev->pciInfo.bus)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
if (l1->dev->pciInfo.bus > l2->dev->pciInfo.bus)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
// Bus is same for devices of links. Compare device next
|
||||
|
||||
// Compare link devices
|
||||
if (l1->dev->pciInfo.device < l2->dev->pciInfo.device)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
if (l1->dev->pciInfo.device > l2->dev->pciInfo.device)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
// Device is same for devices of links. Compare function next
|
||||
|
||||
// Compare link functions
|
||||
if (l1->dev->pciInfo.function < l2->dev->pciInfo.function)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
if (l1->dev->pciInfo.function > l2->dev->pciInfo.function)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
// DBDF is same for both the links. Check the link#
|
||||
|
||||
// Compare link numbers
|
||||
if (l1->linkNumber < l2->linkNumber)
|
||||
{
|
||||
return NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user