/*************************************************************************
 * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * See LICENSE.txt for license information
 ************************************************************************/

#ifndef MSCCLPP_COMM_H_
#define MSCCLPP_COMM_H_

#include "proxy.h"
#include "ib.h"

// #define CACHE_LINE_SIZE 128
// #define MEM_ALIGN 4096
// #define CUDA_IPC_MIN 2097152UL

// // Channels / LL tuning
// #define MSCCLPP_LL_THREAD_THRESHOLD 8
// #define MSCCLPP_LL128_THREAD_THRESHOLD 8
// #define MSCCLPP_SIMPLE_THREAD_THRESHOLD 64

#define MAXCONNECTIONS 1024

struct mscclppConn {
  mscclppTransport_t transport;
  int remoteRank;
  int buffSize;
  uint64_t *remoteProxyFlag;
  uint64_t *cpuProxyFlag;
  void *cpuProxyFlagGdrDesc;
  struct mscclppDevConn *devConn;
  struct mscclppIbContext *ibCtx;
  struct mscclppIbQp *ibQp;
  struct mscclppIbMr *ibBuffMr;
  struct mscclppIbMr *ibLocalFlagMr;
  struct mscclppIbMr *ibProxyFlagMr;
  struct mscclppIbMrInfo ibBuffMrInfo;
  struct mscclppIbMrInfo ibLocalFlagMrInfo;
  struct mscclppIbMrInfo ibProxyFlagMrInfo;
};

struct mscclppComm {
  struct mscclppConn conns[MAXCONNECTIONS];
  int nConns;

  void* bootstrap;

  uint64_t magic; // Magic number for all network communication. Not a security key -- only goal is to detect mismatches.

  int rank;    // my rank in the communicator
  int nRanks;  // number of GPUs in communicator
  int cudaDev; // my cuda device index

  // Flag to ask MSCCLPP kernels to abort
  volatile uint32_t *abortFlag;

  struct mscclppIbContext *ibContext[MSCCLPP_IB_MAX_DEVS];
  cudaStream_t stream; // DMA engine stream for P2P
  struct mscclppProxyState *proxyState[MSCCLPP_PROXY_MAX_NUM];
};

#endif