Files
mscclpp/python/csrc/CMakeLists.txt
Binyang Li 8896cd909a Add ROCm FP8 E4M3B15 support (#774)
## Summary

Add ROCm (gfx942) support for the FP8 E4M3B15 data type, including
optimized conversion routines between FP8 E4M3B15 and FP16/FP32 using
inline assembly.

Extends the allpair packet and fullmesh allreduce kernels to support
higher-precision accumulation (e.g., FP16/FP32) when reducing FP8 data,
improving numerical accuracy.

Adds Python tests to verify that higher-precision accumulation is at
least as accurate as native FP8 accumulation across all algorithm
variants.

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-08 09:53:45 -07:00

31 lines
1.2 KiB
CMake

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
include(FetchContent)
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.9.2)
FetchContent_MakeAvailable(nanobind)
FetchContent_Declare(dlpack
GIT_REPOSITORY https://github.com/dmlc/dlpack.git
GIT_TAG 5c210da409e7f1e51ddf445134a4376fdbd70d7d
)
FetchContent_GetProperties(dlpack)
if(NOT dlpack_POPULATED)
FetchContent_Populate(dlpack)
# Add dlpack subdirectory but exclude it from installation
add_subdirectory(${dlpack_SOURCE_DIR} ${dlpack_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
nanobind_add_module(mscclpp_py ${SOURCES})
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp)
set_target_properties(mscclpp_py PROPERTIES INSTALL_RPATH "\$ORIGIN/lib")
target_link_libraries(mscclpp_py PRIVATE dlpack mscclpp mscclpp_collectives ${GPU_LIBRARIES})
target_include_directories(mscclpp_py SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
if(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_py PRIVATE MSCCLPP_USE_ROCM)
endif()
install(TARGETS mscclpp_py LIBRARY DESTINATION .)