diff --git a/cmake/FindGDRCopy.cmake b/cmake/FindGDRCopy.cmake index 54e0ba1c..c1f786ae 100644 --- a/cmake/FindGDRCopy.cmake +++ b/cmake/FindGDRCopy.cmake @@ -30,15 +30,19 @@ find_library(GDRCOPY_LIBRARIES ${GDRCOPY_ROOT_DIR}/lib /usr/local/lib /usr/lib - /usr/lib/x86_64-linux-gnu) + /usr/lib/x86_64-linux-gnu + /usr/lib/aarch64-linux-gnu) if(GDRCOPY_INCLUDE_DIRS) - include(CheckSymbolExists) + include(CheckCXXSourceCompiles) set(CMAKE_REQUIRED_INCLUDES ${GDRCOPY_INCLUDE_DIRS}) set(CMAKE_REQUIRED_LIBRARIES ${GDRCOPY_LIBRARIES}) - check_symbol_exists(gdr_pin_buffer_v2 "gdrapi.h" GDRCOPY_HAS_PIN_BUFFER_V2) - unset(CMAKE_REQUIRED_LIBRARIES) + check_cxx_source_compiles(" + #include + int main() { gdr_pin_buffer_v2(0, 0, 0, 0, 0); return 0; } + " GDRCOPY_HAS_PIN_BUFFER_V2) unset(CMAKE_REQUIRED_INCLUDES) + unset(CMAKE_REQUIRED_LIBRARIES) if(NOT GDRCOPY_HAS_PIN_BUFFER_V2) message(STATUS "GDRCopy found but too old (gdr_pin_buffer_v2 not available). Requires >= 2.5.") set(GDRCOPY_INCLUDE_DIRS GDRCOPY_INCLUDE_DIRS-NOTFOUND) diff --git a/include/mscclpp/core.hpp b/include/mscclpp/core.hpp index 5b184f0a..4aeab654 100644 --- a/include/mscclpp/core.hpp +++ b/include/mscclpp/core.hpp @@ -390,7 +390,7 @@ struct EndpointConfig { }; static constexpr int DefaultPort = -1; - static constexpr int DefaultGidIndex = 0; + static constexpr int DefaultGidIndex = 3; static constexpr int DefaultMaxCqSize = 1024; static constexpr int DefaultMaxCqPollNum = 1; static constexpr int DefaultMaxSendWr = 8192; diff --git a/python/mscclpp/language/rank.py b/python/mscclpp/language/rank.py index e5b7aab8..0c38cb06 100644 --- a/python/mscclpp/language/rank.py +++ b/python/mscclpp/language/rank.py @@ -304,11 +304,16 @@ class BaseBuffer: self.size = offset + size def __getitem__(self, key): - if self.offset + key.stop > self.size: - raise RuntimeError( - f"Index range from {self.offset + key.start} - {self.offset + key.stop} is out of bounds for buffer {self.buffer_type}. Buffer size: {self.size}" - ) - return Chunk(self.rank, self.buffer_type, self.offset + key.start, key.stop - key.start) + if isinstance(key, slice): + start = key.start if key.start is not None else 0 + stop = key.stop if key.stop is not None else (self.size - self.offset) + if self.offset + stop > self.size: + raise RuntimeError( + f"Index range from {self.offset + start} - {self.offset + stop} is out of bounds for buffer {self.buffer_type}. Buffer size: {self.size}" + ) + return Chunk(self.rank, self.buffer_type, self.offset + start, stop - start) + else: + raise TypeError(f"Buffer indices must be slices, not {type(key).__name__}") class Buffer(BaseBuffer): diff --git a/test.json b/test.json new file mode 100644 index 00000000..294c2a13 --- /dev/null +++ b/test.json @@ -0,0 +1,218 @@ +{ + "name": "send_recv_test", + "collective": "test", + "protocol": "Simple", + "inplace": false, + "reuse_resources": false, + "gpus": [ + { + "id": 0, + "input_chunks": 1, + "output_chunks": 1, + "scratch_chunks": 0, + "threadblocks": [ + { + "id": 0, + "ops": [ + { + "name": "signal", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "wait", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "nop" + }, + { + "name": "put", + "src_buff": [ + { + "type": "i", + "index": 0, + "size": 1 + } + ], + "dst_buff": [ + { + "buffer_id": 0, + "index": 0, + "size": 1 + } + ], + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "nop" + }, + { + "name": "signal", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "wait", + "channel_ids": [ + 0 + ], + "channel_type": "port" + } + ], + "channels": [ + { + "channel_type": "port", + "channel_ids": [ + 0 + ] + } + ], + "remote_buffer_refs": [ + { + "access_channel_type": "port", + "remote_buffer_ids": [ + 0 + ] + } + ] + } + ], + "channels": [ + { + "channel_type": "port", + "connected_to": [ + 1 + ] + } + ], + "remote_buffers": [ + { + "rank": 1, + "type": "o", + "access_channel_types": [ + "port" + ] + } + ], + "semaphores": [] + }, + { + "id": 1, + "input_chunks": 1, + "output_chunks": 1, + "scratch_chunks": 0, + "threadblocks": [ + { + "id": 0, + "ops": [ + { + "name": "signal", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "wait", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "nop" + }, + { + "name": "put", + "src_buff": [ + { + "type": "i", + "index": 0, + "size": 1 + } + ], + "dst_buff": [ + { + "buffer_id": 0, + "index": 0, + "size": 1 + } + ], + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "nop" + }, + { + "name": "signal", + "channel_ids": [ + 0 + ], + "channel_type": "port" + }, + { + "name": "wait", + "channel_ids": [ + 0 + ], + "channel_type": "port" + } + ], + "channels": [ + { + "channel_type": "port", + "channel_ids": [ + 0 + ] + } + ], + "remote_buffer_refs": [ + { + "access_channel_type": "port", + "remote_buffer_ids": [ + 0 + ] + } + ] + } + ], + "channels": [ + { + "channel_type": "port", + "connected_to": [ + 0 + ] + } + ], + "remote_buffers": [ + { + "rank": 0, + "type": "o", + "access_channel_types": [ + "port" + ] + } + ], + "semaphores": [] + } + ], + "num_threads_per_block": 1024, + "use_double_scratch_buffer": false, + "buffer_alignment": 16, + "min_message_size": 0, + "max_message_size": 18446744073709551615 +}