mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-25 07:14:40 +00:00
update
This commit is contained in:
8
Makefile
8
Makefile
@@ -129,10 +129,11 @@ LIBSONAME := $(LIBNAME).$(MSCCLPP_MAJOR)
|
||||
LIBTARGET := $(BUILDDIR)/$(LIBDIR)/$(LIBNAME).$(MSCCLPP_MAJOR).$(MSCCLPP_MINOR).$(MSCCLPP_PATCH)
|
||||
|
||||
TESTSDIR := tests
|
||||
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu common.cu)
|
||||
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test.cu)
|
||||
TESTSOBJS := $(patsubst %.cc,%.o,$(TESTSSRCS)) $(patsubst %.cu,%.o,$(TESTSSRCS))
|
||||
TESTSOBJTARGETS := $(TESTSOBJS:%=$(BUILDDIR)/$(OBJDIR)/%)
|
||||
TESTSBINS := $(patsubst %.o,$(BUILDDIR)/$(BINDIR)/%,$(TESTSOBJS))
|
||||
TESTSPERFBIN := $(BUILDDIR)/$(BINDIR)/allgather_test3
|
||||
|
||||
INCLUDE := -Isrc -Isrc/include
|
||||
|
||||
@@ -146,6 +147,8 @@ lib: $(LIBOBJTARGETS) $(INCTARGETS) $(LIBTARGET)
|
||||
|
||||
tests: $(TESTSBINS)
|
||||
|
||||
allgather_perf: $(TESTSPERFBIN)
|
||||
|
||||
cpplint:
|
||||
clang-format-12 -style=file --verbose --Werror --dry-run $(CPPSOURCES)
|
||||
|
||||
@@ -172,6 +175,9 @@ $(LIBTARGET): $(LIBOBJTARGETS)
|
||||
ln -sf $(LIBTARGET) $(BUILDDIR)/$(LIBDIR)/$(LIBSONAME)
|
||||
|
||||
# Compile .cc tests
|
||||
$(TESTSPERFBIN): $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/allgather_test3.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/common.o $(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/timer.o
|
||||
$(NVCC) -o $@ $^ $(MPI_LDFLAGS) -L$(BUILDDIR)/$(LIBDIR) -lmscclpp
|
||||
|
||||
$(BUILDDIR)/$(OBJDIR)/$(TESTSDIR)/%.o: $(TESTSDIR)/%.cc $(INCTARGETS)
|
||||
@mkdir -p $(@D)
|
||||
$(CXX) -o $@ -I$(BUILDDIR)/$(INCDIR) $(MPI_INC) $(CXXFLAGS) -c $< $(MPI_MACRO)
|
||||
|
||||
@@ -77,4 +77,4 @@ struct testEngine allGatherEngine = {
|
||||
AllGatherRunTest
|
||||
};
|
||||
|
||||
#pragma weak mcclppTestEngine=allGatherEngine
|
||||
#pragma weak mscclppTestEngine=allGatherEngine
|
||||
@@ -128,11 +128,6 @@ testResult_t startColl(struct threadArgs* args, int in_place, int iter) {
|
||||
return testSuccess;
|
||||
}
|
||||
|
||||
testResult_t completeColl(struct threadArgs* args) {
|
||||
TESTCHECK(testStreamSynchronize(args->nGpus, args->streams));
|
||||
return testSuccess;
|
||||
}
|
||||
|
||||
testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams)
|
||||
{
|
||||
cudaError_t cudaErr;
|
||||
@@ -157,14 +152,19 @@ testResult_t testStreamSynchronize(int ngpus, cudaStream_t* streams)
|
||||
|
||||
if (cudaErr != cudaErrorNotReady)
|
||||
CUDACHECK(cudaErr);
|
||||
|
||||
// We might want to let other threads (including NCCL threads) use the CPU.
|
||||
if (idle)
|
||||
sched_yield();
|
||||
}
|
||||
free(done);
|
||||
return testSuccess;
|
||||
|
||||
// We might want to let other threads (including NCCL threads) use the CPU.
|
||||
if (idle)
|
||||
sched_yield();
|
||||
}
|
||||
free(done);
|
||||
return testSuccess;
|
||||
}
|
||||
|
||||
testResult_t completeColl(struct threadArgs* args) {
|
||||
TESTCHECK(testStreamSynchronize(args->nGpus, args->streams));
|
||||
return testSuccess;
|
||||
}
|
||||
|
||||
// Inter-thread/process barrier+allreduce. The quality of the return value
|
||||
@@ -531,12 +531,12 @@ testResult_t run() {
|
||||
|
||||
const char* timeStr = report_cputime ? "cputime" : "time";
|
||||
PRINT("#\n");
|
||||
PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "",
|
||||
PRINT("# %10s %12s out-of-place in-place \n", "",
|
||||
"");
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", "type", "redop", "root",
|
||||
timeStr, "algbw", "busbw", "#wrong", timeStr, "algbw", "busbw", "#wrong");
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "", "", "",
|
||||
"(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", "");
|
||||
PRINT("# %10s %12s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", timeStr, "algbw", "busbw", "#wrong",
|
||||
timeStr, "algbw", "busbw", "#wrong");
|
||||
PRINT("# %10s %12s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "(us)", "(GB/s)", "(GB/s)", "",
|
||||
"(us)", "(GB/s)", "(GB/s)", "");
|
||||
|
||||
struct testThread thread = {0};
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ static void getHostName(char* hostname, int maxlen)
|
||||
}
|
||||
}
|
||||
|
||||
void print_usage(const char* prog)
|
||||
inline void print_usage(const char* prog)
|
||||
{
|
||||
#ifdef MSCCLPP_USE_MPI_FOR_TESTS
|
||||
printf("usage: %s IP:PORT [rank nranks]\n", prog);
|
||||
@@ -149,7 +149,7 @@ void print_usage(const char* prog)
|
||||
#endif
|
||||
}
|
||||
|
||||
void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size)
|
||||
inline void parse_arguments(int argc, const char* argv[], const char** ip_port, int* rank, int* world_size)
|
||||
{
|
||||
#ifdef MSCCLPP_USE_MPI_FOR_TESTS
|
||||
if (argc != 2 && argc != 4) {
|
||||
|
||||
Reference in New Issue
Block a user