Providing reduce-scatter test support (#390)

This commit is contained in:
Caio Rocha
2024-11-28 09:19:30 -08:00
committed by GitHub
parent d9c297ba14
commit ff18bb8d0b
2 changed files with 32 additions and 4 deletions

View File

@@ -74,6 +74,8 @@ def bench_correctness(
fill_data_kernel_name = "fill_data_%s" % dtype_str
if "allgather" in execution_plan_name:
coll = "all_gather"
elif "reducescatter" in execution_plan_name:
coll = "reduce_scatter"
else:
coll = "all_reduce"
test_data_kernel_name = "test_data_%s_%s" % (coll, dtype_str)
@@ -96,7 +98,7 @@ def bench_correctness(
fill_data_kernel.launch_kernel(fill_data_params, nblocks, nthreads, 0, stream)
func(stream)
test_data_params = (
pack(result_buf, test_buf) + struct.pack("Q", input_buf.nbytes // type_size) + pack(num_ranks, i)
pack(result_buf, test_buf) + struct.pack("Q", input_buf.nbytes // type_size) + pack(num_ranks, rank, i)
)
test_data_kernel.launch_kernel(test_data_params, nblocks, nthreads, 0, stream)
graph = stream.end_capture()
@@ -128,7 +130,7 @@ def dtype_to_mscclpp_dtype(dtype):
def allocate_buffer(nelems, dtype):
if is_nvls_supported:
if is_nvls_supported():
buffer_raw = alloc_shared_physical_cuda(nelems * cp.dtype(dtype).itemsize)
buffer_ptr = cp.cuda.MemoryPointer(
cp.cuda.UnownedMemory(buffer_raw.get_ptr(), buffer_raw.size(), buffer_raw), 0