Files
open-gpu-kernel-modules/src/common/nvlink/interface/nvlink.h
Bernhard Stoeckner 337e28efda 535.86.05
2023-07-18 16:00:22 +02:00

623 lines
23 KiB
C

/*******************************************************************************
Copyright (c) 2014-2023 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
//
// nvlink.h
//
#ifndef _NVLINK_H_
#define _NVLINK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <nv-kernel-interface-api.h>
#include "nvlink_common.h"
#include "nvlink_lib_ctrl.h"
#include "nv_list.h"
#include "nvlink_errors.h"
#include "nvCpuUuid.h"
// Debug Prints
#if defined(DEVELOP) || defined(DEBUG) || defined(NV_MODS)
#define NVLINK_PRINT_ENABLED 1
#define NVLINK_PRINT(format_and_stuff) nvlink_print format_and_stuff
#define DBG_MODULE_NVLINK_CORE __FILE__, __LINE__, __FUNCTION__
#define DBG_MODULE_IBMNPU DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_TEGRASHIM DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_EBRIDGE DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_NVSWITCH DBG_MODULE_NVLINK_CORE
#else
#define NVLINK_PRINT(format_and_stuff) ((void)(0))
#endif
// Devices that support NVLINK
#define NVLINK_DEVICE_TYPE_EBRIDGE 0x0
#define NVLINK_DEVICE_TYPE_IBMNPU 0x1
#define NVLINK_DEVICE_TYPE_GPU 0x2
#define NVLINK_DEVICE_TYPE_NVSWITCH 0x3
#define NVLINK_DEVICE_TYPE_TEGRASHIM 0x4
// NVLink versions
#define NVLINK_DEVICE_VERSION_10 0x00000001
#define NVLINK_DEVICE_VERSION_20 0x00000002
#define NVLINK_DEVICE_VERSION_22 0x00000004
#define NVLINK_DEVICE_VERSION_30 0x00000005
#define NVLINK_DEVICE_VERSION_31 0x00000006
#define NVLINK_DEVICE_VERSION_40 0x00000007
// Link Transition Timeouts in miliseconds
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
#define NVLINK_TRANSITION_ACTIVE_PENDING 2000
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70
// Link training seed values
#define NVLINK_MAX_SEED_NUM 6
#define NVLINK_MAX_SEED_BUFFER_SIZE NVLINK_MAX_SEED_NUM + 1
#define NVLINK_MAX_SYSTEM_LINK_NUM 624
// Forwards
struct nvlink_device;
struct nvlink_device_handle;
struct nvlink_link;
struct nvlink_link_handlers;
// nvlink device state
struct nvlink_device
{
NVListRec node;
// List of links associated with this device
NVListRec link_list;
// Uniquely identifies a device in the core
NvU64 deviceId;
// Client supplied names and ids
char *driverName;
char *deviceName;
NvU8 *uuid;
// PCI Information
struct nvlink_pci_info pciInfo;
// Device type and status
NvU64 type;
NvBool initialized;
// Training type: ALI or Non-ALI
NvBool enableALI;
// fabric node id
NvU16 nodeId;
// per Ioctrl data
NvU32 numIoctrls;
NvU32 numLinksPerIoctrl;
NvU32 numActiveLinksPerIoctrl;
//
// boolean indicating if a given device
// is a reduced nvlink config
//
NvBool bReducedNvlinkConfig;
// Client private information
void *pDevInfo;
};
// nvlink link change type
enum nvlink_link_change_type
{
nvlink_retrain_from_off,
nvlink_retrain_from_safe,
};
// nvlink link_change parameters
struct nvlink_link_change
{
struct nvlink_link *master;
struct nvlink_link *slave;
enum nvlink_link_change_type change_type;
};
//
// Structure representing Nvlink Error Threshold
//
struct nvlink_link_error_threshold
{
NvU8 thresholdMan;
NvU8 thresholdExp;
NvU8 timescaleMan;
NvU8 timescaleExp;
NvBool bInterruptEn;
NvBool bUserConfig;
NvBool bInterruptTrigerred; // Error threshold interrupt generated
};
// nvlink link state
struct nvlink_link
{
NVListRec node;
// Device the link is associated with
struct nvlink_device *dev;
// Lock for per link structure
void *linkLock;
// Uniquely identifies a link in the core
NvU64 linkId;
// If this link is the master of its connection
NvBool master;
// Client supplied link name and number
char *linkName;
NvU32 linkNumber;
NvU64 token;
// Link state
NvU32 state;
NvBool inSWCFG;
// Sublink states
NvU32 tx_sublink_state;
NvU32 rx_sublink_state;
// Has rceiver detect passed
NvBool bRxDetected;
// Link failed when sending InitPll to minion
NvBool bTxCommonModeFail;
// Link failed when transitioning to SWCFG
NvBool bSafeTransitionFail;
// Link failed when sending INITPHASE5 to minion
NvBool bInitphase5Fails;
// IP version
NvU32 version;
// Has state been saved
NvBool bStateSaved;
// Number of retries to put link to safe
NvU32 safe_retries;
// Set if LINK is ac coupled
NvBool ac_coupled;
// Number of retries to discover the other end of the link
NvU32 packet_injection_retries;
// Local Sid of the link.
NvU64 localSid;
// Remote Sid of the link.
NvU64 remoteSid;
// Remote LinkId to which the current link is connected.
NvU32 remoteLinkId;
NvU32 remoteDeviceType;
// Has INITNEGOTIATE received CONFIG_GOOD (NVL3.0+)
NvBool bInitnegotiateConfigGood;
// Power state transition status
enum
{
nvlink_power_state_in_L0,
nvlink_power_state_entering_L2,
nvlink_power_state_in_L2,
nvlink_power_state_exiting_L2
} powerStateTransitionStatus;
// Link handlers
const struct nvlink_link_handlers *link_handlers;
// Client private information
void *link_info;
// Outstanding link change request information
struct nvlink_link_change link_change;
//seed data for given nvlink
NvU32 seedData[NVLINK_MAX_SEED_BUFFER_SIZE];
struct nvlink_link_error_threshold errorThreshold;
};
// nvlink link handler ops
struct nvlink_link_handlers
{
NV_API_CALL NvlStatus (*add) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*remove) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*lock) (struct nvlink_link *link);
NV_API_CALL void (*unlock) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*queue_link_change) (struct nvlink_link_change *link_change);
NV_API_CALL NvlStatus (*set_dl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_dl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_detect) (struct nvlink_link *link, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_detect) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*write_discovery_token) (struct nvlink_link *link, NvU64 token);
NV_API_CALL NvlStatus (*read_discovery_token) (struct nvlink_link *link, NvU64 *token);
NV_API_CALL void (*training_complete) (struct nvlink_link *link);
NV_API_CALL void (*get_uphy_load) (struct nvlink_link *link, NvBool* bUnlocked);
NV_API_CALL NvlStatus (*ali_training) (struct nvlink_link *link);
};
//
// Represents an intranode connections in single/multi-node system.
// Both endpoints of the connection is visible from same node.
//
struct nvlink_intranode_conn
{
NVListRec node;
struct nvlink_link *end0;
struct nvlink_link *end1;
};
//
// Represents internode connections in a multi-node system.
// One of the endpoint of the connection must be a local link.
//
struct nvlink_internode_conn
{
NVListRec node;
struct nvlink_link *local_end;
nvlink_remote_endpoint_info remote_end;
};
// Typedefs
typedef struct nvlink_device nvlink_device;
typedef struct nvlink_device_handle nvlink_device_handle;
typedef struct nvlink_link nvlink_link;
typedef struct nvlink_link_change nvlink_link_change;
typedef struct nvlink_device_handlers nvlink_device_handlers;
typedef struct nvlink_link_handlers nvlink_link_handlers;
typedef struct nvlink_intranode_conn nvlink_intranode_conn;
typedef struct nvlink_internode_conn nvlink_internode_conn;
typedef enum nvlink_link_change_type nvlink_link_change_type;
typedef struct nvlink_inband_data nvlink_inband_data;
#define NVLINK_MAX_NUM_SAFE_RETRIES 7
#define NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES 4
// NVLINK LINK states
#define NVLINK_LINKSTATE_OFF 0x00 // OFF
#define NVLINK_LINKSTATE_HS 0x01 // High Speed
#define NVLINK_LINKSTATE_SAFE 0x02 // Safe/Discovery State
#define NVLINK_LINKSTATE_FAULT 0x03 // Faulty
#define NVLINK_LINKSTATE_RECOVERY 0x04 // Recovery
#define NVLINK_LINKSTATE_FAIL 0x05 // Unconnected/Fail
#define NVLINK_LINKSTATE_DETECT 0x06 // Detect mode
#define NVLINK_LINKSTATE_RESET 0x07 // Reset
#define NVLINK_LINKSTATE_ENABLE_PM 0x08 // Enable Link Power Management
#define NVLINK_LINKSTATE_DISABLE_PM 0x09 // Disable Link Power Management
#define NVLINK_LINKSTATE_SLEEP 0x0A // Sleep (L2)
#define NVLINK_LINKSTATE_SAVE_STATE 0x0B // Save state while entering L2
#define NVLINK_LINKSTATE_RESTORE_STATE 0x0C // Restore state while exiting L2
#define NVLINK_LINKSTATE_PRE_HS 0x0E // Settings before moving to High Speed
#define NVLINK_LINKSTATE_DISABLE_ERR_DETECT 0x0F // Disable Error detection (interrupt)
#define NVLINK_LINKSTATE_LANE_DISABLE 0x10 // Disable Lanes
#define NVLINK_LINKSTATE_LANE_SHUTDOWN 0x11 // Shutdown Lanes in PHY
#define NVLINK_LINKSTATE_TRAFFIC_SETUP 0x12 // Setup traffic flow after ACTIVE
#define NVLINK_LINKSTATE_INITPHASE1 0x13 // INITPHASE1
#define NVLINK_LINKSTATE_INITNEGOTIATE 0x14 // Initialize the negotiation (Ampere And Later)
#define NVLINK_LINKSTATE_POST_INITNEGOTIATE 0x15 // Sends DL stat
#define NVLINK_LINKSTATE_INITOPTIMIZE 0x16 // INITOPTIMIZE
#define NVLINK_LINKSTATE_POST_INITOPTIMIZE 0x17 // POST INITOPTIMIZE DL stat check
#define NVLINK_LINKSTATE_DISABLE_HEARTBEAT 0x18 // Disables the heartbeat errors
#define NVLINK_LINKSTATE_CONTAIN 0x19 // TL is in contain mode
#define NVLINK_LINKSTATE_INITTL 0x1A // INITTL
#define NVLINK_LINKSTATE_INITPHASE5 0x1B // INITPHASE5
#define NVLINK_LINKSTATE_ALI 0x1C // ALI
#define NVLINK_LINKSTATE_ACTIVE_PENDING 0x1D // Intermediate state for a link going to active
#define NVLINK_LINKSTATE_INVALID 0xFF // Invalid state
// NVLINK TX SUBLINK states
#define NVLINK_SUBLINK_STATE_TX_HS 0x0 // TX High Speed
#define NVLINK_SUBLINK_STATE_TX_SINGLE_LANE 0x4 // TX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_TX_LOW_POWER 0x4 // TX Single Lane Mode / L1
#define NVLINK_SUBLINK_STATE_TX_TRAIN 0x5 // TX training
#define NVLINK_SUBLINK_STATE_TX_SAFE 0x6 // TX Safe Mode
#define NVLINK_SUBLINK_STATE_TX_OFF 0x7 // TX OFF
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE 0x8 // TX common mode enable
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE 0x9 // TX common mode disable
#define NVLINK_SUBLINK_STATE_TX_DATA_READY 0xA // Do Data Ready and Data Enable
#define NVLINK_SUBLINK_STATE_TX_EQ 0xB // TX equalization
#define NVLINK_SUBLINK_STATE_TX_PRBS_EN 0xC // TX IOBIST PRBS generator enable
#define NVLINK_SUBLINK_STATE_TX_POST_HS 0xD // TX Post High Speed settings
// NVLINK RX SUBLINK states
#define NVLINK_SUBLINK_STATE_RX_HS 0x0 // RX High Speed
#define NVLINK_SUBLINK_STATE_RX_SINGLE_LANE 0x4 // RX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_RX_LOW_POWER 0x4 // RX Single Lane Mode / L1
#define NVLINK_SUBLINK_STATE_RX_TRAIN 0x5 // RX training
#define NVLINK_SUBLINK_STATE_RX_SAFE 0x6 // RX Safe Mode
#define NVLINK_SUBLINK_STATE_RX_OFF 0x7 // RX OFF
#define NVLINK_SUBLINK_STATE_RX_RXCAL 0x8 // RX in calibration
#define NVLINK_SUBLINK_STATE_RX_INIT_TERM 0x9 // Enable RX termination
// NVLINK TX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_TX_STABLE 0x0 // TX Stable
// NVLINK RX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_RX_STABLE 0x0 // RX Stable
// State change flags
#define NVLINK_STATE_CHANGE_ASYNC 0x0 // Don't wait for the state change to complete
#define NVLINK_STATE_CHANGE_SYNC 0x1 // Wait for the state change to complete
/************************************************************************************************/
/***************************** NVLink library management functions ******************************/
/************************************************************************************************/
/*
* Check if the nvlink core library is initialized
*/
NvBool nvlink_lib_is_initialized(void);
/*
* Check if there are no devices registered
*/
NvBool nvlink_lib_is_device_list_empty(void);
/*
* Get if a device registerd to the nvlink corelib has a reduced nvlink config
*/
NvBool nvlink_lib_is_registerd_device_with_reduced_config(void);
/************************************************************************************************/
/************************** NVLink library driver-side interface ********************************/
/***************** Manages device and link registration and un-registration *********************/
/************************************************************************************************/
/*
* Associates device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_device(nvlink_device *dev);
/*
* Unassociates device in the NVLink Core
* Includes removing any links related to the device if still registered
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_device(nvlink_device *dev);
/*
* Associates link with a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
/*
* Unassociates link from a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/
/************************************************************************************************/
/*
* Check if the device has no links registered
*/
NvBool nvlink_lib_is_link_list_empty(nvlink_device *dev);
/*
* Get the link associated with the given device's link number
*/
NvlStatus nvlink_lib_get_link(nvlink_device *device,
NvU32 link_id,
nvlink_link **link);
/*
* Set the link endpoint as the link master
*/
NvlStatus nvlink_lib_set_link_master(nvlink_link *link);
/*
* Get the link master associated with this endpoint
*/
NvlStatus nvlink_lib_get_link_master(nvlink_link *link, nvlink_link **master);
/*
* Set the training state for the given link as non-ALI or ALI
*/
NvlStatus nvlink_lib_link_set_training_mode(nvlink_link *link, NvBool enableALI);
/************************************************************************************************/
/*************************** NVLink topology discovery functions ********************************/
/************************************************************************************************/
/*
* Get the connected remote endpoint information
* For a given link, return the other endpoint details it is connected
* to. If there is no connection associated with the given link, then
* conn_info.connected member will be NV_FALSE.
*
* Note: This routine will not initiate any link initialization or topology
* discovery.
*/
NvlStatus nvlink_lib_get_remote_conn_info(nvlink_link *link, nvlink_conn_info *conn_info);
/*
* Get the connected remote endpoint information
* For a given end of a link, returns the device and link information
* for the remote end along with a boolean variable that specifies if
* the topology detection was complete
*/
NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags);
/************************************************************************************************/
/****************************** NVLink initialization functions *********************************/
/************************************************************************************************/
/*
* Re-init a given link from OFF to SWCFG
*/
NvlStatus nvlink_lib_reinit_link_from_off_to_swcfg(nvlink_link *link,
NvU32 flags);
/************************************************************************************************/
/********************************** NVLink training functions ***********************************/
/************************************************************************************************/
/*
* Train a given set of links from SWCFG to ACTIVE state
* a. For low training latency - caller passes all links as an array
* b. For high training latency - caller passes link one by one
*/
NvlStatus nvlink_lib_train_links_from_swcfg_to_active(nvlink_link **links,
NvU32 linkCount,
NvU32 flags);
/*
* Train a given set of links of a device from L2 to ACTIVE state
*/
NvlStatus nvlink_lib_train_links_from_L2_to_active(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* Retrain a given link from SWCFG to ACTIVE
*/
NvlStatus nvlink_lib_retrain_link_from_swcfg_to_active(nvlink_link *link,
NvU32 flags);
/*
* Save the seed Data passed in from an endpoint driver
*/
NvlStatus nvlink_lib_save_training_seeds(nvlink_link * link,
NvU32 * seedData);
NvlStatus nvlink_lib_copy_training_seeds(nvlink_link * link,
NvU32 * seedDataCopy);
/*
* Send the endpoint driver back the seeds we have stored
*/
void nvlink_lib_restore_training_seeds(nvlink_link * link,
NvU32 * seedData);
/*
* Check that the requested links have trained to active
*/
NvlStatus nvlink_lib_check_training_complete(nvlink_link **links,
NvU32 linkCount);
/************************************************************************************************/
/********************************** NVLink shutdown functions ***********************************/
/************************************************************************************************/
/*
* [CLEAN SHUTDOWN]
* Shutdown given links of a device from active to L2 state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_L2(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* [PSEUDO-CLEAN SHUTDOWN]
* Shutdown the given array of links from ACTIVE to OFF state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_off(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Power down the given array of links from ACTIVE to SWCFG state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_swcfg(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Reset the given array of links
*/
NvlStatus nvlink_lib_reset_links(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Floorsweep the necessary links and set buffer ready on the active links
*/
NvlStatus nvlink_lib_powerdown_floorswept_links_to_off(nvlink_device *pDevice);
/*
* Nvlink core library structure iterators
*/
#define FOR_EACH_DEVICE_REGISTERED(dev, head, node) \
nvListForEachEntry(dev, &head.node, node)
#define FOR_EACH_LINK_REGISTERED(link, dev, node) \
nvListForEachEntry(link, &dev->link_list, node)
#define FOR_EACH_LINK_REGISTERED_SAFE(link, next, dev, node) \
nvListForEachEntry_safe(link, next, &dev->link_list, node)
#define FOR_EACH_CONNECTION(conn, head, node) \
nvListForEachEntry(conn, &head.node, node)
#ifdef __cplusplus
}
#endif
#endif // _NVLINK_H_