mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
Reorganize current native algorithm implementation and DSL algorithm implementation. Provide unified API for DSL algo and native algo and provide interface to tune the algo Provide interface for pytorch integration with native API and DSL --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: chhwang <8018170+chhwang@users.noreply.github.com>
23 lines
509 B
Makefile
23 lines
509 B
Makefile
CUDA_HOME ?= /usr/local/cuda
|
|
ROCM_HOME ?= /opt/rocm
|
|
|
|
# Check if nvcc exists, otherwise use hipcc
|
|
ifeq ($(shell which $(CUDA_HOME)/bin/nvcc 2>/dev/null),)
|
|
COMPILER := $(ROCM_HOME)/bin/hipcc
|
|
ARCH_FLAG := -D__HIP_PLATFORM_AMD__=1
|
|
else
|
|
COMPILER := $(CUDA_HOME)/bin/nvcc
|
|
ARCH_FLAG := -arch=native
|
|
endif
|
|
|
|
TARGET = customized_allgather
|
|
SRC = customized_allgather.cu
|
|
|
|
all: $(TARGET)
|
|
|
|
$(TARGET): $(SRC)
|
|
$(COMPILER) $(ARCH_FLAG) -o $@ $< -lmscclpp_collectives -lmscclpp -lnccl
|
|
|
|
clean:
|
|
rm -f $(TARGET)
|