Files
mscclpp/examples/customized-collective-algorithm/Makefile
Binyang Li a707273701 Torch integration (#692)
Reorganize current native algorithm implementation and DSL algorithm
implementation.
Provide unified API for DSL algo and native algo and provide interface
to tune the algo
Provide interface for pytorch integration with native API and DSL

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
Co-authored-by: chhwang <8018170+chhwang@users.noreply.github.com>
2026-01-21 20:32:24 -08:00

23 lines
509 B
Makefile

CUDA_HOME ?= /usr/local/cuda
ROCM_HOME ?= /opt/rocm
# Check if nvcc exists, otherwise use hipcc
ifeq ($(shell which $(CUDA_HOME)/bin/nvcc 2>/dev/null),)
COMPILER := $(ROCM_HOME)/bin/hipcc
ARCH_FLAG := -D__HIP_PLATFORM_AMD__=1
else
COMPILER := $(CUDA_HOME)/bin/nvcc
ARCH_FLAG := -arch=native
endif
TARGET = customized_allgather
SRC = customized_allgather.cu
all: $(TARGET)
$(TARGET): $(SRC)
$(COMPILER) $(ARCH_FLAG) -o $@ $< -lmscclpp_collectives -lmscclpp -lnccl
clean:
rm -f $(TARGET)