Some cleanup

2026-05-13 01:36:10 +00:00 · 2023-04-11 08:45:22 +00:00
parent 69b5bdfd13
commit d2c2ae72a7
15 changed files with 16 additions and 181 deletions
--- a/src/include/alloc.h
+++ b/src/include/alloc.h
@@ -12,7 +12,6 @@
 #include "mscclpp.h"
 #include "utils.h"
 #include <stdlib.h>
-#include <string.h>
 #include <sys/mman.h>
 #include <unistd.h>

--- a/src/include/comm.h
+++ b/src/include/comm.h
@@ -10,16 +10,9 @@
 #include "ib.h"
 #include "proxy.h"

+#if defined(ENABLE_NPKIT)
 #include <vector>
-
-// #define CACHE_LINE_SIZE 128
-// #define MEM_ALIGN 4096
-// #define CUDA_IPC_MIN 2097152UL
-
-// // Channels / LL tuning
-// #define MSCCLPP_LL_THREAD_THRESHOLD 8
-// #define MSCCLPP_LL128_THREAD_THRESHOLD 8
-// #define MSCCLPP_SIMPLE_THREAD_THRESHOLD 64
+#endif

 #define MAXCONNECTIONS 64

--- a/src/include/core.h
+++ b/src/include/core.h
@@ -1,30 +0,0 @@
-/*************************************************************************
- * Copyright (c) 2015-2021, NVIDIA CORPORATION. All rights reserved.
- *
- * See LICENSE.txt for license information
- ************************************************************************/
-
-#ifndef MSCCLPP_CORE_H_
-#define MSCCLPP_CORE_H_
-
-#include "alloc.h"
-#include "debug.h"
-#include "mscclpp.h"
-#include "param.h"
-#include <algorithm> // For std::min/std::max
-#include <pthread.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#ifdef PROFAPI
-#define MSCCLPP_API(ret, func, args...)                                                                                \
-  __attribute__((visibility("default"))) __attribute__((alias(#func))) ret p##func(args);                              \
-  extern "C" __attribute__((visibility("default"))) __attribute__((weak)) ret func(args)
-#else
-#define MSCCLPP_API(ret, func, args...) extern "C" __attribute__((visibility("default"))) ret func(args)
-#endif // end PROFAPI
-
-#endif // end include guard
--- a/src/include/mscclpp.h
+++ b/src/include/mscclpp.h
@@ -12,7 +12,6 @@
 #define MSCCLPP_PROXY_FIFO_FLUSH_COUNTER 4

 #include <mscclppfifo.h>
-#include <time.h>
 #include <vector>

 #ifdef __cplusplus
@@ -180,7 +179,8 @@ struct mscclppDevConn : mscclppBaseConn
    *(volatile uint64_t*)&(localSignalEpochId->device) += 1;
  }

-#endif
+#endif // __CUDACC__
+
  // this is a concurrent fifo which is multiple threads from the device
  // can produce for and the sole proxy thread consumes it.
  struct mscclppConcurrentFifo fifo;
--- a/src/include/mscclppfifo.h
+++ b/src/include/mscclppfifo.h
@@ -49,7 +49,7 @@ typedef mscclppTrigger* mscclppTrigger_t;
 * push() function increments triggerFifoHead, proxyState->fifoTailHost is updated in proxy.cc:mscclppProxyService
 * and it occasionally flushes it to triggerFifoTail via a cudaMemcpyAsync.
 *
- * Why douplicating the tail is a good idea? The fifo is large engouh and we do not need frequent updates
+ * Why duplicating the tail is a good idea? The fifo is large engouh and we do not need frequent updates
 * for the tail as there is usually enough space for device threads to push their work into.
 */
 struct mscclppConcurrentFifo
--- a/src/include/npkit/npkit.h
+++ b/src/include/npkit/npkit.h
@@ -2,9 +2,6 @@
 #define NPKIT_H_

 #include <string>
-#include <thread>
-
-#include <cuda_runtime.h>

 #include "npkit/npkit_event.h"
 #include "npkit/npkit_struct.h"
--- a/src/include/param.h
+++ b/src/include/param.h
@@ -1,30 +0,0 @@
-/*************************************************************************
- * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * See LICENSE.txt for license information
- ************************************************************************/
-
-#ifndef MSCCLPP_PARAM_H_
-#define MSCCLPP_PARAM_H_
-
-#include <stdint.h>
-
-const char* userHomeDir();
-void setEnvFile(const char* fileName);
-void initEnv();
-
-void mscclppLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache);
-
-#define MSCCLPP_PARAM(name, env, deftVal)                                                                              \
-  int64_t mscclppParam##name()                                                                                         \
-  {                                                                                                                    \
-    constexpr int64_t uninitialized = INT64_MIN;                                                                       \
-    static_assert(deftVal != uninitialized, "default value cannot be the uninitialized value.");                       \
-    static int64_t cache = uninitialized;                                                                              \
-    if (__builtin_expect(__atomic_load_n(&cache, __ATOMIC_RELAXED) == uninitialized, false)) {                         \
-      mscclppLoadParam("MSCCLPP_" env, deftVal, uninitialized, &cache);                                                \
-    }                                                                                                                  \
-    return cache;                                                                                                      \
-  }
-
-#endif
--- a/src/include/utils.h
+++ b/src/include/utils.h
@@ -8,13 +8,9 @@
 #define MSCCLPP_UTILS_H_

 #include "alloc.h"
-#include "checks.h"
 #include "mscclpp.h"
-#include <new>
-#include <numa.h>
-#include <sched.h>
+#include <chrono>
 #include <stdint.h>
-#include <time.h>

 // int mscclppCudaCompCap();