Manage runtime environments (#452)

* Add `Env` class that manages all runtime environments.
* Changed `NPKIT_DUMP_DIR` to `MSCCLPP_NPKIT_DUMP_DIR`.
This commit is contained in:
Changho Hwang
2025-01-15 09:44:52 -08:00
committed by GitHub
parent 8ac50dc85d
commit 869cdba00c
19 changed files with 229 additions and 51 deletions

View File

@@ -27,6 +27,7 @@ from mscclpp import (
Transport,
is_nvls_supported,
npkit,
env,
)
import mscclpp.comm as mscclpp_comm
from mscclpp.utils import KernelBuilder, GpuBuffer, pack
@@ -36,6 +37,19 @@ from .mscclpp_mpi import MpiGroup, parametrize_mpi_groups, mpi_group
ethernet_interface_name = "eth0"
@parametrize_mpi_groups(1)
def test_env(mpi_group: MpiGroup):
e = env()
assert isinstance(e.debug, str)
with pytest.raises(AttributeError):
# all attributes should be read-only
e.debug = "INFO"
# should be the same object
e2 = env()
assert e == e2
def all_ranks_on_the_same_node(mpi_group: MpiGroup):
if (ethernet_interface_name in ni.interfaces()) is False:
pytest.skip(f"{ethernet_interface_name} is not an interface to use on this node")
@@ -624,8 +638,8 @@ def test_executor(mpi_group: MpiGroup, filename: str):
project_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
mscclpp_group = mscclpp_comm.CommGroup(mpi_group.comm)
executor = Executor(mscclpp_group.communicator)
npkit_dump_dir = os.getenv("NPKIT_DUMP_DIR")
if npkit_dump_dir is not None:
npkit_dump_dir = env().npkit_dump_dir
if npkit_dump_dir != "":
npkit.init(mscclpp_group.my_rank)
execution_plan = ExecutionPlan(os.path.join(project_dir, "test", "execution-files", filename))