diff --git a/Makefile b/Makefile index 2793b7bb..55e51dc2 100644 --- a/Makefile +++ b/Makefile @@ -129,10 +129,11 @@ LIBSONAME := $(LIBNAME).$(MSCCLPP_MAJOR) LIBTARGET := $(BUILDDIR)/$(LIBDIR)/$(LIBNAME).$(MSCCLPP_MAJOR).$(MSCCLPP_MINOR).$(MSCCLPP_PATCH) TESTSDIR := tests -TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu common.cu) +TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu) TESTSOBJS := $(patsubst %.cc,%.o,$(TESTSSRCS)) $(patsubst %.cu,%.o,$(TESTSSRCS)) TESTSOBJTARGETS := $(TESTSOBJS:%=$(BUILDDIR)/$(OBJDIR)/%) TESTSBINS := $(patsubst %.o,$(BUILDDIR)/$(BINDIR)/%,$(TESTSOBJS)) +TESTSPERFBIN := $(BUILDDIR)/$(BINDIR)/allgather_test3 INCLUDE := -Isrc -Isrc/include @@ -146,6 +147,8 @@ lib: $(LIBOBJTARGETS) $(INCTARGETS) $(LIBTARGET) tests: $(TESTSBINS) +allgather_perf: $(TESTSPERFBIN) + cpplint: clang-format-12 -style=file --verbose --Werror --dry-run $(CPPSOURCES) @@ -172,6 +175,9 @@ $(LIBTARGET): $(LIBOBJTARGETS) ln -sf $(LIBTARGET) $(BUILDDIR)/$(LIBDIR)/$(LIBSONAME) # Compile .cc tests +$(TESTSPERFBIN): $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/allgather_test3.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/common.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/timer.o + $(NVCC) -o $@ $^ $(MPI_LDFLAGS) -L$(BUILDDIR)/$(LIBDIR) -lmscclpp + $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/%.o: $(TESTSDIR)/%.cc $(INCTARGETS) @mkdir -p $(@D) $(CXX) -o $@ -I$(BUILDDIR)/$(INCDIR) $(MPI_INC) $(CXXFLAGS) -c $< $(MPI_MACRO) diff --git a/tests/allgather_test3.cu b/tests/allgather_test3.cu index 5aad077c..adf9426b 100644 --- a/tests/allgather_test3.cu +++ b/tests/allgather_test3.cu @@ -77,4 +77,4 @@ struct testEngine allGatherEngine = { AllGatherRunTest }; -#pragma weak mcclppTestEngine=allGatherEngine \ No newline at end of file +#pragma weak mscclppTestEngine=allGatherEngine \ No newline at end of file diff --git a/tests/common.cu b/tests/common.cu index 5074312b..a4ffde38 100644 --- a/tests/common.cu +++ b/tests/common.cu @@ -128,11 +128,6 @@ testResult_t startColl(struct threadArgs* args, int in_place, int iter) { return testSuccess; } -testResult_t completeColl(struct threadArgs* args) { - TESTCHECK(testStreamSynchronize(args->nGpus, args->streams)); - return testSuccess; -} - testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams) { cudaError_t cudaErr; @@ -157,14 +152,19 @@ testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams) if (cudaErr != cudaErrorNotReady) CUDACHECK(cudaErr); - - // We might want to let other threads (including NCCL threads) use the CPU. - if (idle) - sched_yield(); } - free(done); - return testSuccess; + + // We might want to let other threads (including NCCL threads) use the CPU. + if (idle) + sched_yield(); } + free(done); + return testSuccess; +} + +testResult_t completeColl(struct threadArgs* args) { + TESTCHECK(testStreamSynchronize(args->nGpus, args->streams)); + return testSuccess; } // Inter-thread/process barrier+allreduce. The quality of the return value @@ -531,12 +531,12 @@ testResult_t run() { const char* timeStr = report_cputime ? "cputime" : "time"; PRINT("#\n"); - PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "", + PRINT("# %10s %12s out-of-place in-place \n", "", ""); - PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", "type", "redop", "root", - timeStr, "algbw", "busbw", "#wrong", timeStr, "algbw", "busbw", "#wrong"); - PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "", "", "", - "(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", ""); + PRINT("# %10s %12s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", timeStr, "algbw", "busbw", "#wrong", + timeStr, "algbw", "busbw", "#wrong"); + PRINT("# %10s %12s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "(us)", "(GB/s)", "(GB/s)", "", + "(us)", "(GB/s)", "(GB/s)", ""); struct testThread thread = {0}; diff --git a/tests/common.h b/tests/common.h index 92aa645c..5e0dca5e 100644 --- a/tests/common.h +++ b/tests/common.h @@ -140,7 +140,7 @@ static void getHostName(char* hostname, int maxlen) } } -void print_usage(const char* prog) +inline void print_usage(const char* prog) { #ifdef MSCCLPP_USE_MPI_FOR_TESTS printf("usage: %s IP:PORT [rank nranks]\n", prog); @@ -149,7 +149,7 @@ void print_usage(const char* prog) #endif } -void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size) +inline void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size) { #ifdef MSCCLPP_USE_MPI_FOR_TESTS if (argc != 2 && argc != 4) {