Support ReduceScatter in the NCCL interface (#460)

Co-authored-by: root <root@mscclpp-000002.tn3ujtlnlkjehmmeegdavazkfg.jx.internal.cloudapp.net>
Co-authored-by: Caio Rocha <aiorocha@microsoft.com>
Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
This commit is contained in:
Caio Rocha
2025-02-11 13:28:19 -08:00
committed by GitHub
parent a6e00cc449
commit 55789bc551
4 changed files with 70 additions and 9 deletions

View File

@@ -91,7 +91,7 @@ TEST_DATA_ALL_REDUCE(int32, int)
} \
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_elems; i += blockDim.x * gridDim.x) { \
if (i >= offset && i < offset + nem_elems_per_rank) { \
assert(abs(float(result_buf[i]) - float(test_buf[i])) < 1e-3 * num_ranks); \
assert(abs(float(result_buf[i - offset]) - float(test_buf[i])) < 1e-3 * num_ranks); \
} \
} \
}