mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-12 01:10:22 +00:00
- Removes the GTest dependency, replacing it with a minimal custom framework (`test/framework.*`) that covers only what the tests actually use — a unified `TEST()` macro with SFINAE-based fixture auto-detection, `EXPECT_*`/`ASSERT_*` assertions, environments, and setup/teardown. - `--exclude-perf-tests` flag and substring-based negative filtering - `MSCCLPP_ENABLE_COVERAGE` CMake option with gcov/lcov; CI uploads to Codecov - Merges standalone `test/perf/` into main test targets - Refactors Azure pipelines to reduce redundancies & make more readable --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
63 lines
1.7 KiB
C++
63 lines
1.7 KiB
C++
// Copyright (c) Microsoft Corporation.
|
|
// Licensed under the MIT License.
|
|
|
|
#include <mscclpp/gpu_utils.hpp>
|
|
|
|
#include "../framework.hpp"
|
|
|
|
TEST(GpuUtilsTest, StreamPool) {
|
|
auto streamPool = mscclpp::gpuStreamPool();
|
|
cudaStream_t s;
|
|
{
|
|
auto stream1 = streamPool->getStream();
|
|
s = stream1;
|
|
EXPECT_NE(s, nullptr);
|
|
}
|
|
{
|
|
auto stream2 = streamPool->getStream();
|
|
EXPECT_EQ(cudaStream_t(stream2), s);
|
|
}
|
|
{
|
|
auto stream3 = streamPool->getStream();
|
|
auto stream4 = streamPool->getStream();
|
|
EXPECT_NE(cudaStream_t(stream3), cudaStream_t(stream4));
|
|
}
|
|
streamPool->clear();
|
|
}
|
|
|
|
TEST(GpuUtilsTest, AllocShared) {
|
|
auto p1 = mscclpp::detail::gpuCallocShared<uint32_t>();
|
|
auto p2 = mscclpp::detail::gpuCallocShared<int64_t>(5);
|
|
}
|
|
|
|
TEST(GpuUtilsTest, AllocUnique) {
|
|
auto p1 = mscclpp::detail::gpuCallocUnique<uint32_t>();
|
|
auto p2 = mscclpp::detail::gpuCallocUnique<int64_t>(5);
|
|
}
|
|
|
|
TEST(GpuUtilsTest, MakeSharedHost) {
|
|
auto p1 = mscclpp::detail::gpuCallocHostShared<uint32_t>();
|
|
auto p2 = mscclpp::detail::gpuCallocHostShared<int64_t>(5);
|
|
}
|
|
|
|
TEST(GpuUtilsTest, MakeUniqueHost) {
|
|
auto p1 = mscclpp::detail::gpuCallocHostUnique<uint32_t>();
|
|
auto p2 = mscclpp::detail::gpuCallocHostUnique<int64_t>(5);
|
|
}
|
|
|
|
TEST(GpuUtilsTest, Memcpy) {
|
|
const int nElem = 1024;
|
|
std::vector<int> hostBuff(nElem);
|
|
for (int i = 0; i < nElem; ++i) {
|
|
hostBuff[i] = i + 1;
|
|
}
|
|
std::vector<int> hostBuffTmp(nElem, 0);
|
|
auto devBuff = mscclpp::detail::gpuCallocShared<int>(nElem);
|
|
mscclpp::gpuMemcpy<int>(devBuff.get(), hostBuff.data(), nElem, cudaMemcpyHostToDevice);
|
|
mscclpp::gpuMemcpy<int>(hostBuffTmp.data(), devBuff.get(), nElem, cudaMemcpyDeviceToHost);
|
|
|
|
for (int i = 0; i < nElem; ++i) {
|
|
EXPECT_EQ(hostBuff[i], hostBuffTmp[i]);
|
|
}
|
|
}
|