From 82fe0b667d2ebb8ffaa412590b60a0ce6ed700a3 Mon Sep 17 00:00:00 2001 From: Changho Hwang Date: Fri, 3 Feb 2023 12:29:27 +0000 Subject: [PATCH] Add a makefile and logging functions --- .gitignore | 2 + Makefile | 105 +++++++++++++++++++++++++++++ src/debug.cc | 61 +++++++++++++++++ src/include/debug.h | 20 ++++++ src/include/{msccl.h => mscclpp.h} | 7 +- src/init_test.cc | 14 ++++ 6 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 src/debug.cc create mode 100644 src/include/debug.h rename src/include/{msccl.h => mscclpp.h} (82%) create mode 100644 src/init_test.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e524d792 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode/ +build/ diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..2759631a --- /dev/null +++ b/Makefile @@ -0,0 +1,105 @@ +DEBUG ?= 0 +VERBOSE ?= 1 + +######## CUDA +CUDA_HOME ?= /usr/local/cuda +CUDA_INC ?= $(CUDA_HOME)/include +NVCC = $(CUDA_HOME)/bin/nvcc +CUDA_VERSION = $(strip $(shell which $(NVCC) >/dev/null && $(NVCC) --version | grep release | sed 's/.*release //' | sed 's/\,.*//')) +CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1) +CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2) +# You should define NVCC_GENCODE in your environment to the minimal set +# of archs to reduce compile time. +CUDA8_GENCODE = -gencode=arch=compute_50,code=sm_50 \ + -gencode=arch=compute_60,code=sm_60 \ + -gencode=arch=compute_61,code=sm_61 +ifeq ($(shell test "0$(CUDA_MAJOR)" -lt 12; echo $$?),0) +# SM35 is deprecated from CUDA12.0 onwards +CUDA8_GENCODE += -gencode=arch=compute_35,code=sm_35 +endif +CUDA9_GENCODE = -gencode=arch=compute_70,code=sm_70 +CUDA11_GENCODE = -gencode=arch=compute_80,code=sm_80 +CUDA12_GENCODE = -gencode=arch=compute_90,code=sm_90 + +CUDA8_PTX = -gencode=arch=compute_61,code=compute_61 +CUDA9_PTX = -gencode=arch=compute_70,code=compute_70 +CUDA11_PTX = -gencode=arch=compute_80,code=compute_80 +CUDA12_PTX = -gencode=arch=compute_90,code=compute_90 + +######## CXX/NVCC +CXX := g++ +NVTX ?= 1 + +ifeq ($(shell test "0$(CUDA_MAJOR)" -eq 11 -a "0$(CUDA_MINOR)" -ge 8 -o "0$(CUDA_MAJOR)" -gt 11; echo $$?),0) +# Include Hopper support if we're using CUDA11.8 or above + NVCC_GENCODE ?= $(CUDA8_GENCODE) $(CUDA9_GENCODE) $(CUDA11_GENCODE) $(CUDA12_GENCODE) $(CUDA12_PTX) +else ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 11; echo $$?),0) + NVCC_GENCODE ?= $(CUDA8_GENCODE) $(CUDA9_GENCODE) $(CUDA11_GENCODE) $(CUDA11_PTX) +# Include Volta support if we're using CUDA9 or above +else ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 9; echo $$?),0) + NVCC_GENCODE ?= $(CUDA8_GENCODE) $(CUDA9_GENCODE) $(CUDA9_PTX) +else + NVCC_GENCODE ?= $(CUDA8_GENCODE) $(CUDA8_PTX) +endif +$(info NVCC_GENCODE is ${NVCC_GENCODE}) + +CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden \ + -Wall -Wno-unused-function -Wno-sign-compare -std=c++14 -Wvla \ + -I $(CUDA_INC) \ + $(CXXFLAGS) +# Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors) +# 512 : 120, 640 : 96, 768 : 80, 1024 : 60 +# We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions. +NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all +# Use addprefix so that we can specify more than one path +NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt + +ifeq ($(DEBUG), 0) +NVCUFLAGS += -O3 +CXXFLAGS += -O3 -g +else +NVCUFLAGS += -O0 -G -g +CXXFLAGS += -O0 -g -ggdb3 +endif + +ifneq ($(VERBOSE), 0) +NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter +CXXFLAGS += -Wall -Wextra +else +.SILENT: +endif + +ifeq ($(NVTX), 0) +CXXFLAGS += -DNVTX_DISABLE +endif + +#### MSCCL++ +BUILDDIR ?= $(abspath ./build) +ABSBUILDDIR := $(abspath $(BUILDDIR)) + +BUILDSRCS := init.cc debug.cc +BUILDOBJS := $(patsubst %.cc,$(ABSBUILDDIR)/src/%.o,$(BUILDSRCS)) + +TESTSSRCS := init_test.cc +TESTSOBJS := $(patsubst %.cc,$(ABSBUILDDIR)/src/%.o,$(TESTSSRCS)) +TESTBINS := $(patsubst %.cc,$(ABSBUILDDIR)/src/%,$(TESTSSRCS)) + +INCLUDE := -Isrc -Isrc/include + +.PHONY: all build tests clean + +all: build tests + +build: $(BUILDOBJS) +tests: $(TESTBINS) + +$(ABSBUILDDIR)/%.o: %.cc + @mkdir -p $(@D) + $(CXX) -o $@ $(INCLUDE) $(CXXFLAGS) -c $< + +$(TESTBINS): %: %.o $(BUILDOBJS) + @mkdir -p $(@D) + $(CXX) -o $@ $^ $(NVLDFLAGS) + +clean: + rm -rf $(ABSBUILDDIR) diff --git a/src/debug.cc b/src/debug.cc new file mode 100644 index 00000000..238837af --- /dev/null +++ b/src/debug.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include "debug.h" + +using namespace std; + +int mscclDebugLevel = -1; + +void mscclppDebugInit() +{ + int lev = -1; + const char *mscclpp_debug = getenv("MSCCLPP_DEBUG"); + if (mscclpp_debug == nullptr) { + lev = MSCCLPP_LOG_NONE; + } else { + string mscclpp_debug_str(mscclpp_debug); + if (mscclpp_debug_str == "INFO") { + lev = MSCCLPP_LOG_INFO; + } else if (mscclpp_debug_str == "DEBUG") { + lev = MSCCLPP_LOG_DEBUG; + } else if (mscclpp_debug_str == "ABORT") { + lev = MSCCLPP_LOG_ABORT; + } else { + throw runtime_error("Unknown debug level given: " + mscclpp_debug_str); + } + } + mscclDebugLevel = lev; +} + +void mscclppDebugLog(mscclDebugLogLevel level, const char *filefunc, int line, + const char *fmt, ...) +{ + if (mscclDebugLevel == -1) { + mscclppDebugInit(); + } + if (level < mscclDebugLevel) { + return; + } + string lev_str; + if (level == MSCCLPP_LOG_INFO) { + lev_str = "INFO"; + } else if (level == MSCCLPP_LOG_DEBUG) { + lev_str = "DEBUG"; + } else if (level == MSCCLPP_LOG_ABORT) { + lev_str = "ABORT"; + } else { + assert(false); + } + char buffer[1024]; + va_list vargs; + va_start(vargs, fmt); + vsnprintf(buffer, 1024, fmt, vargs); + va_end(vargs); + stringstream ss; + ss << "MSCCL " << lev_str << ": (" << filefunc << ":" << line << ") " + << buffer << endl; + cerr << ss.str(); +} diff --git a/src/include/debug.h b/src/include/debug.h new file mode 100644 index 00000000..d86055cc --- /dev/null +++ b/src/include/debug.h @@ -0,0 +1,20 @@ +#ifndef MSCCLPP_DEBUG_H_ +#define MSCCLPP_DEBUG_H_ + +extern int mscclDebugLevel; + +typedef enum { + MSCCLPP_LOG_NONE = 0, + MSCCLPP_LOG_INFO = 1, + MSCCLPP_LOG_DEBUG = 2, + MSCCLPP_LOG_ABORT = 3, +} mscclDebugLogLevel; + +void mscclppDebugLog(mscclDebugLogLevel level, const char *filefunc, int line, + const char *fmt, ...); + +#define INFO(...) mscclppDebugLog(MSCCLPP_LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) +#define DEBUG(...) mscclppDebugLog(MSCCLPP_LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__) +#define ABORT(...) mscclppDebugLog(MSCCLPP_LOG_ABORT, __FILE__, __LINE__, __VA_ARGS__) + +#endif // MSCCLPP_DEBUG_H_ diff --git a/src/include/msccl.h b/src/include/mscclpp.h similarity index 82% rename from src/include/msccl.h rename to src/include/mscclpp.h index 2c566258..03991e63 100644 --- a/src/include/msccl.h +++ b/src/include/mscclpp.h @@ -1,3 +1,6 @@ +#ifndef MSCCLPP_H_ +#define MSCCLPP_H_ + #define MSCCLPP_MAJOR 0 #define MSCCLPP_MINOR 1 @@ -15,4 +18,6 @@ typedef enum { mscclppSuccess = 0, mscclppResult_t mscclppGetUniqueId(mscclppUniqueId* uniqueId); //mscclppResult_t mscclppCommInitRank(mscclppComm_t* comm, int nranks, mscclppUniqueId commId, int rank); -//mscclppResult_t mscclppCommDestroy(mscclppComm_t comm); \ No newline at end of file +//mscclppResult_t mscclppCommDestroy(mscclppComm_t comm); + +#endif // MSCCLPP_H_ diff --git a/src/init_test.cc b/src/init_test.cc new file mode 100644 index 00000000..8cbac7a4 --- /dev/null +++ b/src/init_test.cc @@ -0,0 +1,14 @@ +#include +#include "debug.h" +#include "mscclpp.h" + +int main() +{ + mscclppUniqueId uid; + mscclppResult_t res = mscclppGetUniqueId(&uid); + if (res != mscclppSuccess) { + ABORT("mscclppGetUniqueId failed"); + } + INFO("init_test succeed"); + return 0; +}