Files
open-gpu-kernel-modules/src/common/nvlink/interface/nvlink.h
Bernhard Stoeckner 91676d6628 550.40.07
2024-01-24 18:28:48 +01:00

640 lines
24 KiB
C

/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
//
// nvlink.h
//
#ifndef _NVLINK_H_
#define _NVLINK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <nv-kernel-interface-api.h>
#include "nvlink_common.h"
#include "nvlink_lib_ctrl.h"
#include "nv_list.h"
#include "nvlink_errors.h"
#include "nvCpuUuid.h"
// Debug Prints
#if defined(DEVELOP) || defined(DEBUG) || defined(NV_MODS)
#define NVLINK_PRINT_ENABLED 1
#define NVLINK_PRINT(format_and_stuff) nvlink_print format_and_stuff
#define DBG_MODULE_NVLINK_CORE __FILE__, __LINE__, __FUNCTION__
#define DBG_MODULE_IBMNPU DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_TEGRASHIM DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_EBRIDGE DBG_MODULE_NVLINK_CORE
#define DBG_MODULE_NVSWITCH DBG_MODULE_NVLINK_CORE
#else
#define NVLINK_PRINT(format_and_stuff) ((void)(0))
#endif
// Devices that support NVLINK
#define NVLINK_DEVICE_TYPE_EBRIDGE 0x0
#define NVLINK_DEVICE_TYPE_IBMNPU 0x1
#define NVLINK_DEVICE_TYPE_GPU 0x2
#define NVLINK_DEVICE_TYPE_NVSWITCH 0x3
#define NVLINK_DEVICE_TYPE_TEGRASHIM 0x4
// NVLink versions
#define NVLINK_DEVICE_VERSION_10 0x00000001
#define NVLINK_DEVICE_VERSION_20 0x00000002
#define NVLINK_DEVICE_VERSION_22 0x00000004
#define NVLINK_DEVICE_VERSION_30 0x00000005
#define NVLINK_DEVICE_VERSION_31 0x00000006
#define NVLINK_DEVICE_VERSION_40 0x00000007
// Link Transition Timeouts in miliseconds
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
#define NVLINK_TRANSITION_ACTIVE_PENDING 2000
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70
// Link training seed values
#define NVLINK_MAX_SEED_NUM 6
#define NVLINK_MAX_SEED_BUFFER_SIZE NVLINK_MAX_SEED_NUM + 1
#define NVLINK_MAX_SYSTEM_LINK_NUM 624
// Forwards
struct nvlink_device;
struct nvlink_device_handle;
struct nvlink_link;
struct nvlink_link_handlers;
// nvlink device state
struct nvlink_device
{
NVListRec node;
// List of links associated with this device
NVListRec link_list;
// Uniquely identifies a device in the core
NvU64 deviceId;
// Client supplied names and ids
char *driverName;
char *deviceName;
NvU8 *uuid;
// PCI Information
struct nvlink_pci_info pciInfo;
// Device type and status
NvU64 type;
NvBool initialized;
// Training type: ALI or Non-ALI
NvBool enableALI;
// fabric node id
NvU16 nodeId;
// per Ioctrl data
NvU32 numIoctrls;
NvU32 numLinksPerIoctrl;
NvU32 numActiveLinksPerIoctrl;
//
// boolean indicating if a given device
// is a reduced nvlink config
//
NvBool bReducedNvlinkConfig;
// Client private information
void *pDevInfo;
};
// nvlink link change type
enum nvlink_link_change_type
{
nvlink_retrain_from_off,
nvlink_retrain_from_safe,
};
// nvlink link_change parameters
struct nvlink_link_change
{
struct nvlink_link *master;
struct nvlink_link *slave;
enum nvlink_link_change_type change_type;
};
//
// Structure representing Nvlink Error Threshold
//
struct nvlink_link_error_threshold
{
NvU8 thresholdMan;
NvU8 thresholdExp;
NvU8 timescaleMan;
NvU8 timescaleExp;
NvBool bInterruptEn;
NvBool bUserConfig;
NvBool bInterruptTrigerred; // Error threshold interrupt generated
};
// nvlink link state
struct nvlink_link
{
NVListRec node;
// Device the link is associated with
struct nvlink_device *dev;
// Lock for per link structure
void *linkLock;
// Uniquely identifies a link in the core
NvU64 linkId;
// If this link is the master of its connection
NvBool master;
// Client supplied link name and number
char *linkName;
NvU32 linkNumber;
NvU64 token;
// Link state
NvU32 state;
NvBool inSWCFG;
// Sublink states
NvU32 tx_sublink_state;
NvU32 rx_sublink_state;
// Has rceiver detect passed
NvBool bRxDetected;
// Link failed when sending InitPll to minion
NvBool bTxCommonModeFail;
// Link failed when transitioning to SWCFG
NvBool bSafeTransitionFail;
// Link failed when sending INITPHASE5 to minion
NvBool bInitphase5Fails;
// IP version
NvU32 version;
// Has state been saved
NvBool bStateSaved;
// Number of retries to put link to safe
NvU32 safe_retries;
// Set if LINK is ac coupled
NvBool ac_coupled;
// Number of retries to discover the other end of the link
NvU32 packet_injection_retries;
// Local Sid of the link.
NvU64 localSid;
// Remote Sid of the link.
NvU64 remoteSid;
// Remote LinkId to which the current link is connected.
NvU32 remoteLinkId;
NvU32 remoteDeviceType;
// Has INITNEGOTIATE received CONFIG_GOOD (NVL3.0+)
NvBool bInitnegotiateConfigGood;
NvBool bCciManaged;
// Power state transition status
enum
{
nvlink_power_state_in_L0,
nvlink_power_state_entering_L2,
nvlink_power_state_in_L2,
nvlink_power_state_exiting_L2
} powerStateTransitionStatus;
// Link handlers
const struct nvlink_link_handlers *link_handlers;
// Client private information
void *link_info;
// Outstanding link change request information
struct nvlink_link_change link_change;
//seed data for given nvlink
NvU32 seedData[NVLINK_MAX_SEED_BUFFER_SIZE];
struct nvlink_link_error_threshold errorThreshold;
};
// nvlink link handler ops
struct nvlink_link_handlers
{
NV_API_CALL NvlStatus (*add) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*remove) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*lock) (struct nvlink_link *link);
NV_API_CALL void (*unlock) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*queue_link_change) (struct nvlink_link_change *link_change);
NV_API_CALL NvlStatus (*set_dl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_dl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tl_link_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tl_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*set_tx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_tx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_mode) (struct nvlink_link *link, NvU64 mode, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_mode) (struct nvlink_link *link, NvU64 *mode, NvU32 *subMode);
NV_API_CALL NvlStatus (*set_rx_detect) (struct nvlink_link *link, NvU32 flags);
NV_API_CALL NvlStatus (*get_rx_detect) (struct nvlink_link *link);
NV_API_CALL NvlStatus (*write_discovery_token) (struct nvlink_link *link, NvU64 token);
NV_API_CALL NvlStatus (*read_discovery_token) (struct nvlink_link *link, NvU64 *token);
NV_API_CALL void (*training_complete) (struct nvlink_link *link);
NV_API_CALL void (*get_uphy_load) (struct nvlink_link *link, NvBool* bUnlocked);
NV_API_CALL NvlStatus (*get_cci_link_mode) (struct nvlink_link *link, NvU64 *mode);
NV_API_CALL NvlStatus (*ali_training) (struct nvlink_link *link);
};
//
// Represents an intranode connections in single/multi-node system.
// Both endpoints of the connection is visible from same node.
//
struct nvlink_intranode_conn
{
NVListRec node;
struct nvlink_link *end0;
struct nvlink_link *end1;
};
//
// Represents internode connections in a multi-node system.
// One of the endpoint of the connection must be a local link.
//
struct nvlink_internode_conn
{
NVListRec node;
struct nvlink_link *local_end;
nvlink_remote_endpoint_info remote_end;
};
// Typedefs
typedef struct nvlink_device nvlink_device;
typedef struct nvlink_device_handle nvlink_device_handle;
typedef struct nvlink_link nvlink_link;
typedef struct nvlink_link_change nvlink_link_change;
typedef struct nvlink_device_handlers nvlink_device_handlers;
typedef struct nvlink_link_handlers nvlink_link_handlers;
typedef struct nvlink_intranode_conn nvlink_intranode_conn;
typedef struct nvlink_internode_conn nvlink_internode_conn;
typedef enum nvlink_link_change_type nvlink_link_change_type;
typedef struct nvlink_inband_data nvlink_inband_data;
#define NVLINK_MAX_NUM_SAFE_RETRIES 7
#define NVLINK_MAX_NUM_PACKET_INJECTION_RETRIES 4
// NVLINK LINK states
#define NVLINK_LINKSTATE_OFF 0x00 // OFF
#define NVLINK_LINKSTATE_HS 0x01 // High Speed
#define NVLINK_LINKSTATE_SAFE 0x02 // Safe/Discovery State
#define NVLINK_LINKSTATE_FAULT 0x03 // Faulty
#define NVLINK_LINKSTATE_RECOVERY 0x04 // Recovery
#define NVLINK_LINKSTATE_FAIL 0x05 // Unconnected/Fail
#define NVLINK_LINKSTATE_DETECT 0x06 // Detect mode
#define NVLINK_LINKSTATE_RESET 0x07 // Reset
#define NVLINK_LINKSTATE_ENABLE_PM 0x08 // Enable Link Power Management
#define NVLINK_LINKSTATE_DISABLE_PM 0x09 // Disable Link Power Management
#define NVLINK_LINKSTATE_SLEEP 0x0A // Sleep (L2)
#define NVLINK_LINKSTATE_SAVE_STATE 0x0B // Save state while entering L2
#define NVLINK_LINKSTATE_RESTORE_STATE 0x0C // Restore state while exiting L2
#define NVLINK_LINKSTATE_PRE_HS 0x0E // Settings before moving to High Speed
#define NVLINK_LINKSTATE_DISABLE_ERR_DETECT 0x0F // Disable Error detection (interrupt)
#define NVLINK_LINKSTATE_LANE_DISABLE 0x10 // Disable Lanes
#define NVLINK_LINKSTATE_LANE_SHUTDOWN 0x11 // Shutdown Lanes in PHY
#define NVLINK_LINKSTATE_TRAFFIC_SETUP 0x12 // Setup traffic flow after ACTIVE
#define NVLINK_LINKSTATE_INITPHASE1 0x13 // INITPHASE1
#define NVLINK_LINKSTATE_INITNEGOTIATE 0x14 // Initialize the negotiation (Ampere And Later)
#define NVLINK_LINKSTATE_POST_INITNEGOTIATE 0x15 // Sends DL stat
#define NVLINK_LINKSTATE_INITOPTIMIZE 0x16 // INITOPTIMIZE
#define NVLINK_LINKSTATE_POST_INITOPTIMIZE 0x17 // POST INITOPTIMIZE DL stat check
#define NVLINK_LINKSTATE_DISABLE_HEARTBEAT 0x18 // Disables the heartbeat errors
#define NVLINK_LINKSTATE_CONTAIN 0x19 // TL is in contain mode
#define NVLINK_LINKSTATE_INITTL 0x1A // INITTL
#define NVLINK_LINKSTATE_INITPHASE5 0x1B // INITPHASE5
#define NVLINK_LINKSTATE_ALI 0x1C // ALI
#define NVLINK_LINKSTATE_ACTIVE_PENDING 0x1D // Intermediate state for a link going to active
#define NVLINK_LINKSTATE_TRAINING_CCI 0x1E // Intermediate state for a link that is still training
#define NVLINK_LINKSTATE_INVALID 0xFF // Invalid state
// NVLINK TX SUBLINK states
#define NVLINK_SUBLINK_STATE_TX_HS 0x0 // TX High Speed
#define NVLINK_SUBLINK_STATE_TX_SINGLE_LANE 0x4 // TX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_TX_LOW_POWER 0x4 // TX Single Lane Mode / L1
#define NVLINK_SUBLINK_STATE_TX_TRAIN 0x5 // TX training
#define NVLINK_SUBLINK_STATE_TX_SAFE 0x6 // TX Safe Mode
#define NVLINK_SUBLINK_STATE_TX_OFF 0x7 // TX OFF
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE 0x8 // TX common mode enable
#define NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE 0x9 // TX common mode disable
#define NVLINK_SUBLINK_STATE_TX_DATA_READY 0xA // Do Data Ready and Data Enable
#define NVLINK_SUBLINK_STATE_TX_EQ 0xB // TX equalization
#define NVLINK_SUBLINK_STATE_TX_PRBS_EN 0xC // TX IOBIST PRBS generator enable
#define NVLINK_SUBLINK_STATE_TX_POST_HS 0xD // TX Post High Speed settings
// NVLINK RX SUBLINK states
#define NVLINK_SUBLINK_STATE_RX_HS 0x0 // RX High Speed
#define NVLINK_SUBLINK_STATE_RX_SINGLE_LANE 0x4 // RX Single Lane (1/8th or 1/4th) Mode (Deprecated)
#define NVLINK_SUBLINK_STATE_RX_LOW_POWER 0x4 // RX Single Lane Mode / L1
#define NVLINK_SUBLINK_STATE_RX_TRAIN 0x5 // RX training
#define NVLINK_SUBLINK_STATE_RX_SAFE 0x6 // RX Safe Mode
#define NVLINK_SUBLINK_STATE_RX_OFF 0x7 // RX OFF
#define NVLINK_SUBLINK_STATE_RX_RXCAL 0x8 // RX in calibration
#define NVLINK_SUBLINK_STATE_RX_INIT_TERM 0x9 // Enable RX termination
// NVLINK TX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_TX_STABLE 0x0 // TX Stable
// NVLINK RX SUBLINK sub-states
#define NVLINK_SUBLINK_SUBSTATE_RX_STABLE 0x0 // RX Stable
// State change flags
#define NVLINK_STATE_CHANGE_ASYNC 0x0 // Don't wait for the state change to complete
#define NVLINK_STATE_CHANGE_SYNC 0x1 // Wait for the state change to complete
/************************************************************************************************/
/***************************** NVLink library management functions ******************************/
/************************************************************************************************/
/*
* Check if the nvlink core library is initialized
*/
NvBool nvlink_lib_is_initialized(void);
/*
* Check if there are no devices registered
*/
NvBool nvlink_lib_is_device_list_empty(void);
/*
* Get if a device registerd to the nvlink corelib has a reduced nvlink config
*/
NvBool nvlink_lib_is_registerd_device_with_reduced_config(void);
/************************************************************************************************/
/************************** NVLink library driver-side interface ********************************/
/***************** Manages device and link registration and un-registration *********************/
/************************************************************************************************/
/*
* Associates device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_device(nvlink_device *dev);
/*
* Unassociates device in the NVLink Core
* Includes removing any links related to the device if still registered
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_device(nvlink_device *dev);
/*
* Associates link with a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
/*
* Unassociates link from a device in the NVLink Core
* During the call, the calling driver must support callbacks into the driver from Core
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
/************************************************************************************************/
/***************************** NVLink device management functions ******************************/
/************************************************************************************************/
/*
* Update UUID and deviceName in core library
*/
NvlStatus nvlink_lib_update_uuid_and_device_name(nvlink_device_info *devInfo,
NvU8 *uuid,
char *deviceName);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/
/************************************************************************************************/
/*
* Check if the device has no links registered
*/
NvBool nvlink_lib_is_link_list_empty(nvlink_device *dev);
/*
* Get the link associated with the given device's link number
*/
NvlStatus nvlink_lib_get_link(nvlink_device *device,
NvU32 link_id,
nvlink_link **link);
/*
* Set the link endpoint as the link master
*/
NvlStatus nvlink_lib_set_link_master(nvlink_link *link);
/*
* Get the link master associated with this endpoint
*/
NvlStatus nvlink_lib_get_link_master(nvlink_link *link, nvlink_link **master);
/*
* Set the training state for the given link as non-ALI or ALI
*/
NvlStatus nvlink_lib_link_set_training_mode(nvlink_link *link, NvBool enableALI);
/************************************************************************************************/
/*************************** NVLink topology discovery functions ********************************/
/************************************************************************************************/
/*
* Get the connected remote endpoint information
* For a given link, return the other endpoint details it is connected
* to. If there is no connection associated with the given link, then
* conn_info.connected member will be NV_FALSE.
*
* Note: This routine will not initiate any link initialization or topology
* discovery.
*/
NvlStatus nvlink_lib_get_remote_conn_info(nvlink_link *link, nvlink_conn_info *conn_info);
/*
* Get the connected remote endpoint information
* For a given end of a link, returns the device and link information
* for the remote end along with a boolean variable that specifies if
* the topology detection was complete
*/
NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags);
/************************************************************************************************/
/****************************** NVLink initialization functions *********************************/
/************************************************************************************************/
/*
* Re-init a given link from OFF to SWCFG
*/
NvlStatus nvlink_lib_reinit_link_from_off_to_swcfg(nvlink_link *link,
NvU32 flags);
/************************************************************************************************/
/********************************** NVLink training functions ***********************************/
/************************************************************************************************/
/*
* Train a given set of links from SWCFG to ACTIVE state
* a. For low training latency - caller passes all links as an array
* b. For high training latency - caller passes link one by one
*/
NvlStatus nvlink_lib_train_links_from_swcfg_to_active(nvlink_link **links,
NvU32 linkCount,
NvU32 flags);
/*
* Train a given set of links of a device from L2 to ACTIVE state
*/
NvlStatus nvlink_lib_train_links_from_L2_to_active(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* Retrain a given link from SWCFG to ACTIVE
*/
NvlStatus nvlink_lib_retrain_link_from_swcfg_to_active(nvlink_link *link,
NvU32 flags);
/*
* Save the seed Data passed in from an endpoint driver
*/
NvlStatus nvlink_lib_save_training_seeds(nvlink_link * link,
NvU32 * seedData);
NvlStatus nvlink_lib_copy_training_seeds(nvlink_link * link,
NvU32 * seedDataCopy);
/*
* Send the endpoint driver back the seeds we have stored
*/
void nvlink_lib_restore_training_seeds(nvlink_link * link,
NvU32 * seedData);
/*
* Check that the requested links have trained to active
*/
NvlStatus nvlink_lib_check_training_complete(nvlink_link **links,
NvU32 linkCount);
/************************************************************************************************/
/********************************** NVLink shutdown functions ***********************************/
/************************************************************************************************/
/*
* [CLEAN SHUTDOWN]
* Shutdown given links of a device from active to L2 state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_L2(nvlink_device *dev,
NvU32 linkMask,
NvU32 flags);
/*
* [PSEUDO-CLEAN SHUTDOWN]
* Shutdown the given array of links from ACTIVE to OFF state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_off(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Power down the given array of links from ACTIVE to SWCFG state
*/
NvlStatus nvlink_lib_powerdown_links_from_active_to_swcfg(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Reset the given array of links
*/
NvlStatus nvlink_lib_reset_links(nvlink_link **links,
NvU32 numLinks,
NvU32 flags);
/*
* Floorsweep the necessary links and set buffer ready on the active links
*/
NvlStatus nvlink_lib_powerdown_floorswept_links_to_off(nvlink_device *pDevice);
/*
* Nvlink core library structure iterators
*/
#define FOR_EACH_DEVICE_REGISTERED(dev, head, node) \
nvListForEachEntry(dev, &head.node, node)
#define FOR_EACH_LINK_REGISTERED(link, dev, node) \
nvListForEachEntry(link, &dev->link_list, node)
#define FOR_EACH_LINK_REGISTERED_SAFE(link, next, dev, node) \
nvListForEachEntry_safe(link, next, &dev->link_list, node)
#define FOR_EACH_CONNECTION(conn, head, node) \
nvListForEachEntry(conn, &head.node, node)
#ifdef __cplusplus
}
#endif
#endif // _NVLINK_H_