mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-04-19 22:39:11 +00:00
New DSL implementation (#579)
The PR contains following changes: Python side: - Channel based DSL implementation: decouple channel with chunk. - Users create channel explicitly, only need local_rank, remote_rank and channel_type - Adjust executor json file, add remote_buffer fields, different op can use different channel and remote buffers combination. - Reimplement operation fusion, data dependency check mechanism - Add new op such as semaphore, pipeline - Clean code and enhance document C++ side: - Support new execution file json format - Support semaphore and pipeline operation - code clean, support non-zero copy scenario --------- Co-authored-by: Caio Rocha <caiorocha@microsoft.com> Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
This commit is contained in:
@@ -184,7 +184,7 @@ def main(
|
||||
npkit_dump_dir = env().npkit_dump_dir
|
||||
if npkit_dump_dir != "":
|
||||
npkit.init(mscclpp_group.my_rank)
|
||||
execution_plan = ExecutionPlan(execution_plan_path)
|
||||
execution_plan = ExecutionPlan(execution_plan_path, mscclpp_group.my_rank)
|
||||
collective = execution_plan.collective()
|
||||
|
||||
dtype = parse_dtype(dtype_str)
|
||||
|
||||
@@ -659,7 +659,9 @@ def test_executor(mpi_group: MpiGroup, filename: str):
|
||||
npkit_dump_dir = env().npkit_dump_dir
|
||||
if npkit_dump_dir != "":
|
||||
npkit.init(mscclpp_group.my_rank)
|
||||
execution_plan = ExecutionPlan(os.path.join(project_dir, "test", "execution-files", filename))
|
||||
execution_plan = ExecutionPlan(
|
||||
os.path.join(project_dir, "test", "execution-files", filename), mscclpp_group.my_rank
|
||||
)
|
||||
|
||||
nelems = 1024 * 1024
|
||||
cp.random.seed(42)
|
||||
|
||||
Reference in New Issue
Block a user