This commit is contained in:
Binyang Li
2023-03-31 07:10:43 +00:00
parent c7b3d0b0fd
commit 98020f5b52
4 changed files with 26 additions and 20 deletions

View File

@@ -129,10 +129,11 @@ LIBSONAME := $(LIBNAME).$(MSCCLPP_MAJOR)
LIBTARGET := $(BUILDDIR)/$(LIBDIR)/$(LIBNAME).$(MSCCLPP_MAJOR).$(MSCCLPP_MINOR).$(MSCCLPP_PATCH)
TESTSDIR := tests
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu common.cu)
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu)
TESTSOBJS := $(patsubst %.cc,%.o,$(TESTSSRCS)) $(patsubst %.cu,%.o,$(TESTSSRCS))
TESTSOBJTARGETS := $(TESTSOBJS:%=$(BUILDDIR)/$(OBJDIR)/%)
TESTSBINS := $(patsubst %.o,$(BUILDDIR)/$(BINDIR)/%,$(TESTSOBJS))
TESTSPERFBIN := $(BUILDDIR)/$(BINDIR)/allgather_test3
INCLUDE := -Isrc -Isrc/include
@@ -146,6 +147,8 @@ lib: $(LIBOBJTARGETS) $(INCTARGETS) $(LIBTARGET)
tests: $(TESTSBINS)
allgather_perf: $(TESTSPERFBIN)
cpplint:
clang-format-12 -style=file --verbose --Werror --dry-run $(CPPSOURCES)
@@ -172,6 +175,9 @@ $(LIBTARGET): $(LIBOBJTARGETS)
ln -sf $(LIBTARGET) $(BUILDDIR)/$(LIBDIR)/$(LIBSONAME)
# Compile .cc tests
$(TESTSPERFBIN): $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/allgather_test3.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/common.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/timer.o
$(NVCC) -o $@ $^ $(MPI_LDFLAGS) -L$(BUILDDIR)/$(LIBDIR) -lmscclpp
$(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/%.o: $(TESTSDIR)/%.cc $(INCTARGETS)
@mkdir -p $(@D)
$(CXX) -o $@ -I$(BUILDDIR)/$(INCDIR) $(MPI_INC) $(CXXFLAGS) -c $< $(MPI_MACRO)

View File

@@ -77,4 +77,4 @@ struct testEngine allGatherEngine = {
AllGatherRunTest
};
#pragma weak mcclppTestEngine=allGatherEngine
#pragma weak mscclppTestEngine=allGatherEngine

View File

@@ -128,11 +128,6 @@ testResult_t startColl(struct threadArgs* args, int in_place, int iter) {
return testSuccess;
}
testResult_t completeColl(struct threadArgs* args) {
TESTCHECK(testStreamSynchronize(args->nGpus, args->streams));
return testSuccess;
}
testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams)
{
cudaError_t cudaErr;
@@ -157,14 +152,19 @@ testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams)
if (cudaErr != cudaErrorNotReady)
CUDACHECK(cudaErr);
// We might want to let other threads (including NCCL threads) use the CPU.
if (idle)
sched_yield();
}
free(done);
return testSuccess;
// We might want to let other threads (including NCCL threads) use the CPU.
if (idle)
sched_yield();
}
free(done);
return testSuccess;
}
testResult_t completeColl(struct threadArgs* args) {
TESTCHECK(testStreamSynchronize(args->nGpus, args->streams));
return testSuccess;
}
// Inter-thread/process barrier+allreduce. The quality of the return value
@@ -531,12 +531,12 @@ testResult_t run() {
const char* timeStr = report_cputime ? "cputime" : "time";
PRINT("#\n");
PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "",
PRINT("# %10s %12s out-of-place in-place \n", "",
"");
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", "type", "redop", "root",
timeStr, "algbw", "busbw", "#wrong", timeStr, "algbw", "busbw", "#wrong");
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "", "", "",
"(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", "");
PRINT("# %10s %12s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", timeStr, "algbw", "busbw", "#wrong",
timeStr, "algbw", "busbw", "#wrong");
PRINT("# %10s %12s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "(us)", "(GB/s)", "(GB/s)", "",
"(us)", "(GB/s)", "(GB/s)", "");
struct testThread thread = {0};

View File

@@ -140,7 +140,7 @@ static void getHostName(char* hostname, int maxlen)
}
}
void print_usage(const char* prog)
inline void print_usage(const char* prog)
{
#ifdef MSCCLPP_USE_MPI_FOR_TESTS
printf("usage: %s IP:PORT [rank nranks]\n", prog);
@@ -149,7 +149,7 @@ void print_usage(const char* prog)
#endif
}
void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size)
inline void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size)
{
#ifdef MSCCLPP_USE_MPI_FOR_TESTS
if (argc != 2 && argc != 4) {