diff --git a/test/python/ext/ep/test_ep_smoke.py b/test/python/ext/ep/test_ep_smoke.py deleted file mode 100644 index 3002e20f..00000000 --- a/test/python/ext/ep/test_ep_smoke.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Smoke tests for the EP extension. - -These tests only exercise single-rank / pure-Python code paths so they can -run in CI without multi-GPU resources. Multi-rank dispatch/combine tests -belong in ``test/python/ext/ep/test_intranode.py`` and are left as TODO -until the Python frontend is validated on H100. - -Run with:: - - pytest -xvs test/python/ext/ep/test_ep_smoke.py -""" - -from __future__ import annotations - -import pytest - -try: - import mscclpp_ep_cpp as _cpp # type: ignore[import-not-found] -except ImportError: # pragma: no cover - pytest.skip("mscclpp_ep_cpp is not built (set -DMSCCLPP_BUILD_EXT_EP=ON)", allow_module_level=True) - - -def test_config_roundtrip(): - cfg = _cpp.Config(num_sms=20, num_max_nvl_chunked_send_tokens=6, num_max_nvl_chunked_recv_tokens=256, - num_max_rdma_chunked_send_tokens=6, num_max_rdma_chunked_recv_tokens=256) - hint = cfg.get_nvl_buffer_size_hint(7168 * 2, 8) - assert hint > 0 - - -def test_low_latency_size_hint(): - assert _cpp.get_low_latency_rdma_size_hint(128, 7168, 8, 256) > 0 - - -def test_low_latency_buffer_construct(): - # Low-latency kernels are structurally ported. At construction time the - # C++ Buffer must accept low_latency_mode=True; runtime requires a real - # multi-node setup (see tests in tests/test_low_latency.py when ported). - import torch - - if not torch.cuda.is_available(): - pytest.skip("CUDA not available") - - buf = _cpp.Buffer(rank=0, num_ranks=1, num_nvl_bytes=0, num_rdma_bytes=0, low_latency_mode=True) - assert not buf.is_available()