Files
mscclpp/test/unit/cuda_utils_tests.cc
Changho Hwang 34945fb107 Add GpuBuffer class (#423)
* Renamed and moved mem alloc functions into the `mscclpp::detail::`
namespace (now `mscclpp::detail::gpuCalloc*<T>()`)
* Deprecated constructor-calling mem alloc functions
(`mscclpp::makeShared*<T>()` and `mscclpp::makeUnique*<T>()`)
* Added a new `mscclpp::GpuBuffer<T>()` class that should be used in
general for allocating communication buffers
* Added a new `mscclpp.utils.GpuBuffer` Python class that inherits
`cupy.ndarray` and allocates using `mscclpp::gpuMemAlloc`
* Renamed `mscclpp::memcpyCuda*<T>()` functions into
`mscclpp::gpuMemcpy*<T>()` for name consistency
* A few fixes in NVLS memory allocation
* Tackled minor compiler warnings
2025-01-07 18:40:01 -08:00

43 lines
1.3 KiB
C++

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#include <gtest/gtest.h>
#include <mscclpp/gpu_utils.hpp>
TEST(CudaUtilsTest, AllocShared) {
auto p1 = mscclpp::detail::gpuCallocShared<uint32_t>();
auto p2 = mscclpp::detail::gpuCallocShared<int64_t>(5);
}
TEST(CudaUtilsTest, AllocUnique) {
auto p1 = mscclpp::detail::gpuCallocUnique<uint32_t>();
auto p2 = mscclpp::detail::gpuCallocUnique<int64_t>(5);
}
TEST(CudaUtilsTest, MakeSharedHost) {
auto p1 = mscclpp::detail::gpuCallocHostShared<uint32_t>();
auto p2 = mscclpp::detail::gpuCallocHostShared<int64_t>(5);
}
TEST(CudaUtilsTest, MakeUniqueHost) {
auto p1 = mscclpp::detail::gpuCallocHostUnique<uint32_t>();
auto p2 = mscclpp::detail::gpuCallocHostUnique<int64_t>(5);
}
TEST(CudaUtilsTest, Memcpy) {
const int nElem = 1024;
std::vector<int> hostBuff(nElem);
for (int i = 0; i < nElem; ++i) {
hostBuff[i] = i + 1;
}
std::vector<int> hostBuffTmp(nElem, 0);
auto devBuff = mscclpp::detail::gpuCallocShared<int>(nElem);
mscclpp::gpuMemcpy<int>(devBuff.get(), hostBuff.data(), nElem, cudaMemcpyHostToDevice);
mscclpp::gpuMemcpy<int>(hostBuffTmp.data(), devBuff.get(), nElem, cudaMemcpyDeviceToHost);
for (int i = 0; i < nElem; ++i) {
EXPECT_EQ(hostBuff[i], hostBuffTmp[i]);
}
}