mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
## Summary Add ROCm (gfx942) support for the FP8 E4M3B15 data type, including optimized conversion routines between FP8 E4M3B15 and FP16/FP32 using inline assembly. Extends the allpair packet and fullmesh allreduce kernels to support higher-precision accumulation (e.g., FP16/FP32) when reducing FP8 data, improving numerical accuracy. Adds Python tests to verify that higher-precision accumulation is at least as accurate as native FP8 accumulation across all algorithm variants. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
31 lines
1.2 KiB
CMake
31 lines
1.2 KiB
CMake
# Copyright (c) Microsoft Corporation.
|
|
# Licensed under the MIT license.
|
|
|
|
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
|
|
include(FetchContent)
|
|
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.9.2)
|
|
FetchContent_MakeAvailable(nanobind)
|
|
|
|
FetchContent_Declare(dlpack
|
|
GIT_REPOSITORY https://github.com/dmlc/dlpack.git
|
|
GIT_TAG 5c210da409e7f1e51ddf445134a4376fdbd70d7d
|
|
)
|
|
|
|
FetchContent_GetProperties(dlpack)
|
|
if(NOT dlpack_POPULATED)
|
|
FetchContent_Populate(dlpack)
|
|
# Add dlpack subdirectory but exclude it from installation
|
|
add_subdirectory(${dlpack_SOURCE_DIR} ${dlpack_BINARY_DIR} EXCLUDE_FROM_ALL)
|
|
endif()
|
|
|
|
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
|
|
nanobind_add_module(mscclpp_py ${SOURCES})
|
|
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp)
|
|
set_target_properties(mscclpp_py PROPERTIES INSTALL_RPATH "\$ORIGIN/lib")
|
|
target_link_libraries(mscclpp_py PRIVATE dlpack mscclpp mscclpp_collectives ${GPU_LIBRARIES})
|
|
target_include_directories(mscclpp_py SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
|
|
if(MSCCLPP_USE_ROCM)
|
|
target_compile_definitions(mscclpp_py PRIVATE MSCCLPP_USE_ROCM)
|
|
endif()
|
|
install(TARGETS mscclpp_py LIBRARY DESTINATION .)
|