This commit is contained in:
Bernhard Stoeckner
2023-08-08 18:26:28 +02:00
parent 29f830f1bb
commit 12c0739352
85 changed files with 33144 additions and 32566 deletions

View File

@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.86.10\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.98\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@@ -275,6 +275,7 @@ NV_HEADER_PRESENCE_TESTS = \
asm/opal-api.h \
sound/hdaudio.h \
asm/pgtable_types.h \
asm/page.h \
linux/stringhash.h \
linux/dma-map-ops.h \
rdma/peer_mem.h \
@@ -300,7 +301,9 @@ NV_HEADER_PRESENCE_TESTS = \
linux/vfio_pci_core.h \
linux/mdev.h \
soc/tegra/bpmp-abi.h \
soc/tegra/bpmp.h
soc/tegra/bpmp.h \
linux/cc_platform.h \
asm/cpufeature.h
# Filename to store the define for the header in $(1); this is only consumed by
# the rule below that concatenates all of these together.

View File

@@ -511,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU64 size)
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
{
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
void *ptr = ioremap_driver_hardened(phys, size);
#else
void *ptr = ioremap(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
@@ -524,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_CACHE_PRESENT)
void *ptr = ioremap_cache(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
ptr = ioremap_cache_shared(phys, size);
#elif defined(NV_IOREMAP_CACHE_PRESENT)
ptr = ioremap_cache(phys, size);
#elif defined(NVCPU_PPC64LE)
//
// ioremap_cache() has been only implemented correctly for ppc64le with
@@ -543,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
// support on power.
//
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
#else
return nv_ioremap(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_WC_PRESENT)
void *ptr = ioremap_wc(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
ptr = ioremap_driver_hardened_wc(phys, size);
#elif defined(NV_IOREMAP_WC_PRESENT)
ptr = ioremap_wc(phys, size);
#else
return nv_ioremap_nocache(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
static inline void nv_iounmap(void *ptr, NvU64 size)
@@ -634,37 +645,24 @@ static NvBool nv_numa_node_has_memory(int node_id)
free_pages(ptr, order); \
}
extern NvU64 nv_shared_gpa_boundary;
static inline pgprot_t nv_sme_clr(pgprot_t prot)
{
#if defined(__sme_clr)
return __pgprot(__sme_clr(pgprot_val(prot)));
#else
return prot;
#endif // __sme_clr
}
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
/*
* When AMD memory encryption is enabled, device memory mappings with the
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
*
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
*/
#if defined(NV_CC_MKDEC_PRESENT)
if (nv_shared_gpa_boundary != 0)
{
/*
* By design, a VM using vTOM doesn't see the SEV setting and
* for AMD with vTOM, *set* means decrypted.
*/
prot = __pgprot(nv_shared_gpa_boundary | (pgprot_val(vm_prot)));
}
else
{
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
}
#else
prot = pgprot_decrypted(prot);
#endif
#endif
return prot;
#if defined(pgprot_decrypted)
return pgprot_decrypted(prot);
#else
return nv_sme_clr(prot);
#endif // pgprot_decrypted
}
#if defined(PAGE_KERNEL_NOENC)
@@ -1324,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
* is active in all cases.
*/
if (os_sev_enabled)
if (os_cc_enabled)
swiotlb_in_use = NV_TRUE;
#endif

View File

@@ -321,10 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
// The next two fields store UVM_BUFFER_LOCATION values
NvU32 gpFifoLoc;
NvU32 gpPutLoc;
// Allocate the channel as secure. This flag should only be set when
// Confidential Compute is enabled.
NvBool secure;
} UvmGpuChannelAllocParams;
typedef struct UvmGpuPagingChannelAllocParams_tag
@@ -368,9 +364,6 @@ typedef struct
// True if the CE can be used for P2P transactions
NvBool p2p:1;
// True if the CE supports encryption
NvBool secure:1;
// Mask of physical CEs assigned to this LCE
//
// The value returned by RM for this field may change when a GPU is

View File

@@ -214,8 +214,8 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvU32 os_sev_status;
extern NvBool os_sev_enabled;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
/*

View File

@@ -445,6 +445,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@@ -467,6 +470,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@@ -524,6 +530,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@@ -551,6 +560,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@@ -695,6 +707,50 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
;;
ioremap_driver_hardened)
#
# Determine if the ioremap_driver_hardened() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened(void) {
ioremap_driver_hardened();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
;;
ioremap_driver_hardened_wc)
#
# Determine if the ioremap_driver_hardened_wc() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened_wc(void) {
ioremap_driver_hardened_wc();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
;;
ioremap_cache_shared)
#
# Determine if the ioremap_cache_shared() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_cache_shared(void) {
ioremap_cache_shared();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
;;
dom0_kernel_present)
# Add config parameter if running on DOM0.
if [ -n "$VGX_BUILD" ]; then
@@ -4888,40 +4944,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_CHANNEL_STATE_PRESENT" "" "types"
;;
pgprot_decrypted)
cc_platform_has)
#
# Determine if the macro 'pgprot_decrypted()' is present.
# Determine if 'cc_platform_has()' is present.
#
# Added by commit 21729f81ce8a ("x86/mm: Provide general kernel
# support for memory encryption") in v4.14 (2017-07-18)
# Added by commit aa5a461171f9 ("x86/sev: Add an x86 version of
# cc_platform_has()") in v5.15.3 (2021-10-04)
CODE="
#include <asm/pgtable.h>
void conftest_pgprot_decrypted(void)
if(pgprot_decrypted()) {}
}"
compile_check_conftest "$CODE" "NV_PGPROT_DECRYPTED_PRESENT" "" "functions"
;;
cc_mkdec)
#
# Determine if cc_mkdec() is present.
#
# cc_mkdec() by commit b577f542f93c ("x86/coco: Add API to handle
# encryption mask) in v5.18-rc1 (2022-02-22).
#
CODE="
#if defined(NV_ASM_COCO_H_PRESENT)
#include <asm/coco.h>
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
#include <linux/cc_platform.h>
#endif
void conftest_cc_mkdec(void) {
cc_mkdec();
void conftest_cc_platfrom_has(void) {
cc_platform_has();
}"
compile_check_conftest "$CODE" "NV_CC_MKDEC_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
;;
drm_prime_pages_to_sg_has_drm_device_arg)
@@ -6636,8 +6674,8 @@ case "$5" in
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
# On x86_64, vGPU requires MDEV framework to be present.
# On aarch64, vGPU requires vfio-pci-core framework to be present.
if ([ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" != "0" ]) ||
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
exit 0
fi
@@ -6649,8 +6687,8 @@ case "$5" in
echo "CONFIG_VFIO_IOMMU_TYPE1";
fi
if [ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
echo "CONFIG_VFIO_PCI_CORE";
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
fi
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then

View File

@@ -1367,8 +1367,23 @@ static struct drm_driver nv_drm_driver = {
.ioctls = nv_drm_ioctls,
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
/*
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
* conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle().
*
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
* all drivers") made these helpers the default when .prime_handle_to_fd /
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
* them if the helpers aren't present.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
#endif
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
#endif
.gem_prime_import = nv_drm_gem_prime_import,
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,

View File

@@ -54,6 +54,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group

View File

@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
return true;
if (uvm_channel_is_proxy(push->channel)) {
uvm_pushbuffer_t *pushbuffer;
if (dst.is_virtual) {
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
return false;
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
return false;
}
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
@@ -177,10 +180,13 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
// irrespective of the virtualization mode.
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
{
uvm_pushbuffer_t *pushbuffer;
if (!uvm_channel_is_proxy(push->channel))
return;
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
}
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,

View File

@@ -760,7 +760,7 @@ static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t
*mem = NULL;
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);

View File

@@ -272,19 +272,26 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static void unlock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
NvU32 index = uvm_channel_index_in_pool(channel);
NvU32 index;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
__clear_bit(index, channel->pool->push_locks);
uvm_up_out_of_order(&channel->pool->push_sem);
}
if (!uvm_conf_computing_mode_enabled(gpu))
return;
index = uvm_channel_index_in_pool(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
__clear_bit(index, channel->pool->push_locks);
uvm_up_out_of_order(&channel->pool->push_sem);
}
static bool is_channel_locked_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel))
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_conf_computing_mode_enabled(gpu))
return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);
// For CE and proxy channels, we always return that the channel is locked,
@@ -295,25 +302,25 @@ static bool is_channel_locked_for_push(uvm_channel_t *channel)
static void lock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
NvU32 index = uvm_channel_index_in_pool(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
__set_bit(index, channel->pool->push_locks);
}
__set_bit(index, channel->pool->push_locks);
}
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
if (uvm_channel_is_secure(channel) &&
!test_bit(index, channel->pool->push_locks) &&
try_claim_channel_locked(channel, num_gpfifo_entries)) {
if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
lock_channel_for_push(channel);
return true;
}
@@ -321,57 +328,15 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
return false;
}
// Reserve a channel in the specified CE pool
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
// Reserve a channel in the specified secure pool
static NV_STATUS channel_reserve_in_secure_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
// Reserve a channel in the specified pool. The channel is locked until the push
// ends
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
NvU32 index;
UVM_ASSERT(pool);
UVM_ASSERT(pool->secure);
UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
@@ -426,6 +391,51 @@ done:
return NV_OK;
}
// Reserve a channel in the specified pool
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
return channel_reserve_and_lock_in_pool(pool, channel_out);
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
{
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
@@ -433,10 +443,7 @@ NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_t
UVM_ASSERT(pool != NULL);
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
@@ -452,10 +459,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
@@ -491,7 +495,7 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
return push_info - channel->push_infos;
}
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_t *channel, NvU64 semaphore_va)
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
{
NvU32 iv_index;
uvm_gpu_address_t notifier_gpu_va;
@@ -499,12 +503,14 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_
uvm_gpu_address_t semaphore_gpu_va;
uvm_gpu_address_t encrypted_payload_gpu_va;
uvm_gpu_t *gpu = push->gpu;
uvm_channel_t *channel = push->channel;
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
NvU32 payload_size = sizeof(*semaphore->payload);
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
@@ -538,19 +544,21 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
{
NV_STATUS status;
uvm_channel_manager_t *manager;
uvm_gpu_t *gpu;
UVM_ASSERT(channel);
UVM_ASSERT(push);
manager = channel->pool->manager;
gpu = uvm_channel_get_gpu(channel);
// Only SEC2 and WLC with set up fixed schedule can use direct push
// submission. All other cases (including WLC pre-schedule) need to
// reserve a launch channel that will be used to submit this push
// indirectly.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager)) &&
!uvm_channel_is_sec2(channel)) {
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
@@ -559,9 +567,9 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
return status;
}
// For secure channels, channel's lock should have been acquired in
// uvm_channel_reserve() or channel_reserve_in_secure_pool() before
// reaching here.
// When the Confidential Computing feature is enabled, the channel's lock
// should have already been acquired in uvm_channel_reserve() or
// channel_reserve_and_lock_in_pool().
UVM_ASSERT(is_channel_locked_for_push(channel));
push->channel = channel;
@@ -586,9 +594,8 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
NvU64 *gpfifo_entry;
NvU64 pushbuffer_va;
uvm_channel_t *channel = push->channel;
uvm_channel_manager_t *channel_manager = channel->pool->manager;
uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
BUILD_BUG_ON(sizeof(*gpfifo_entry) != NVB06F_GP_ENTRY__SIZE);
UVM_ASSERT(!uvm_channel_is_proxy(channel));
@@ -644,12 +651,11 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
static void do_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
else if (uvm_channel_is_sec2(push->channel))
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
else
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
}
static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
@@ -668,8 +674,8 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
// needs to be scheduled to get an encrypted shadow copy in unprotected
// sysmem. This allows UVM to later decrypt it and observe the new
// semaphore value.
if (uvm_channel_is_secure_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, push->channel, semaphore_va);
if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
}
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
@@ -746,15 +752,12 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
wmb();
// Ring the WLC doorbell to start processing the above push
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset,
wlc_channel->channel_info.workSubmissionToken);
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset, wlc_channel->channel_info.workSubmissionToken);
}
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@@ -767,7 +770,7 @@ static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
uvm_gpu_address_t push_enc_auth_tag_gpu;
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
// WLC submissions are done under channel lock, so there should be no
@@ -848,8 +851,6 @@ static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&gpput_auth_tag_gpu);
// Update GPPUT. The update needs 4B write to specific offset,
// however we can only do 16B aligned decrypt writes.
// A poison value is written to all other locations, this is ignored in
@@ -922,7 +923,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
}
else {
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
@@ -951,11 +952,9 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_auth_tag_gpu.address);
}
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@@ -968,7 +967,7 @@ static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
uvm_gpu_address_t push_auth_tag_gpu;
uvm_spin_loop_t spin;
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
// If the old_cpu_put is not equal to the last gpu put, other pushes are
@@ -1051,7 +1050,7 @@ static void encrypt_push(uvm_push_t *push)
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 push_size = uvm_push_get_size(push);
uvm_push_info_t *push_info = uvm_push_info_from_push(push);
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
if (!uvm_conf_computing_mode_enabled(gpu))
@@ -1098,6 +1097,7 @@ void uvm_channel_end_push(uvm_push_t *push)
NvU32 push_size;
NvU32 cpu_put;
NvU32 new_cpu_put;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
bool needs_sec2_work_submit = false;
channel_pool_lock(channel->pool);
@@ -1112,7 +1112,7 @@ void uvm_channel_end_push(uvm_push_t *push)
if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
gpu->parent->ce_hal->semaphore_reduction_inc(push,
paired_lcic->channel_info.gpPutGpuVa,
paired_lcic->num_gpfifo_entries - 1);
@@ -1126,7 +1126,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// pushes. However, direct pushes to WLC can be smaller than this
// size. This is used e.g. by indirect submission of control
// gpfifo entries.
channel_manager->gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
}
}
@@ -1144,7 +1144,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// Indirect submission via SEC2/WLC needs pushes to be aligned for
// encryption/decryption. The pushbuffer_size of this push
// influences starting address of the next push.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)))
if (uvm_conf_computing_mode_enabled(gpu))
entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
entry->push_info = &channel->push_infos[push->push_info_index];
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
@@ -1158,12 +1158,13 @@ void uvm_channel_end_push(uvm_push_t *push)
else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_wlc(push);
}
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
}
else {
// submitting via SEC2 starts a push, postpone until this push is ended
// submitting via SEC2 starts a push, postpone until this push is
// ended
needs_sec2_work_submit = true;
}
}
@@ -1202,12 +1203,13 @@ void uvm_channel_end_push(uvm_push_t *push)
static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry, NvU32 new_cpu_put)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 cpu_put = channel->cpu_put;
NvU64 *gpfifo_entry;
UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
return;
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
@@ -1234,7 +1236,7 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
UVM_ASSERT(!uvm_channel_is_sec2(channel));
UVM_ASSERT(uvm_channel_is_ce(channel));
// If the old_cpu_put is not equal to the last gpu put,
// Another push(es) is pending that needs to be submitted.
@@ -1290,6 +1292,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
NvU32 cpu_put;
NvU32 new_cpu_put;
bool needs_indirect_submit = false;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
channel_pool_lock(channel->pool);
@@ -1312,7 +1315,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
--channel->current_gpfifo_count;
submit_ctrl_gpfifo(channel, entry, new_cpu_put);
if (uvm_conf_computing_mode_enabled(channel->pool->manager->gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
needs_indirect_submit = true;
channel->cpu_put = new_cpu_put;
@@ -1385,16 +1388,15 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
return NV_OK;
}
static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool = channel->pool;
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
// uvm_channel_end_push() routine. Note that different than in
// channel_reserve_in_secure_pool, we cannot pick an unlocked channel from
// the secure pool, even when there is one available and *channel is locked.
// channel_reserve_and_lock_in_pool, we cannot pick an unlocked channel from
// the pool, even when there is one available and *channel is locked.
// Not a concern given that uvm_channel_reserve() is not the common-case for
// channel reservation, and only used for channel initialization, GPFIFO
// control work submission, and testing.
@@ -1409,6 +1411,8 @@ static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gp
uvm_spin_loop_init(&spin);
while (1) {
NV_STATUS status;
uvm_channel_update_progress(channel);
channel_pool_lock(pool);
@@ -1436,9 +1440,10 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
NV_STATUS status = NV_OK;
uvm_spin_loop_t spin;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_channel_is_secure(channel))
return uvm_channel_reserve_secure(channel, num_gpfifo_entries);
if (uvm_conf_computing_mode_enabled(gpu))
return channel_reserve_and_lock(channel, num_gpfifo_entries);
if (try_claim_channel(channel, num_gpfifo_entries))
return NV_OK;
@@ -1578,8 +1583,10 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
static NV_STATUS csl_init(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_secure(channel));
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
@@ -1589,7 +1596,7 @@ static NV_STATUS csl_init(uvm_channel_t *channel)
else {
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(channel->pool->manager->gpu));
uvm_gpu_name(gpu));
}
return status;
@@ -1609,7 +1616,10 @@ static void csl_destroy(uvm_channel_t *channel)
static void free_conf_computing_buffers(uvm_channel_t *channel)
{
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
uvm_rm_mem_free(channel->conf_computing.static_pb_unprotected_sysmem);
@@ -1637,10 +1647,12 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
{
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NV_STATUS status;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(semaphore->conf_computing.last_pushed_notifier),
@@ -1679,7 +1691,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@@ -1723,7 +1735,7 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
const size_t notifier_size = sizeof(*channel->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@@ -1758,8 +1770,10 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = alloc_conf_computing_buffers_semaphore(channel);
if (status != NV_OK)
@@ -1772,7 +1786,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
status = alloc_conf_computing_buffers_lcic(channel);
}
else {
uvm_gpu_t *gpu = channel->pool->manager->gpu;
void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
channel->num_gpfifo_entries);
@@ -1793,6 +1806,8 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(pool->num_channels > 0);
if (channel->tracking_sem.queued_value > 0) {
@@ -1816,9 +1831,10 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
uvm_kvfree(channel->gpfifo_entries);
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
csl_destroy(channel);
if (uvm_channel_is_secure_ce(channel))
if (uvm_channel_is_ce(channel))
free_conf_computing_buffers(channel);
}
@@ -1905,8 +1921,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_alloc_params.gpPutLoc = UVM_BUFFER_LOCATION_SYS;
}
channel_alloc_params.secure = channel->pool->secure;
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(channel_get_tsg(channel),
&channel_alloc_params,
&channel->handle,
@@ -1928,8 +1942,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_info->hwChannelId,
uvm_channel_is_sec2(channel) ? "SEC2" :
uvm_channel_is_wlc(channel) ? "WLC" :
uvm_channel_is_lcic(channel) ? "LCIC" :
uvm_channel_is_secure(channel) ? "CE (secure)" : "CE",
uvm_channel_is_lcic(channel) ? "LCIC" : "CE",
channel->pool->engine_index);
return NV_OK;
@@ -1981,7 +1994,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
channel->tools.pending_event_count = 0;
INIT_LIST_HEAD(&channel->tools.channel_list_node);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
semaphore_pool = gpu->secure_semaphore_pool;
status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
@@ -2007,7 +2020,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
goto error;
}
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
status = csl_init(channel);
if (status != NV_OK)
goto error;
@@ -2075,7 +2088,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
NvU64 gpfifo_entry;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
if (uvm_channel_is_sec2(channel))
@@ -2095,10 +2108,8 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_channel_is_ce(channel))
gpu->parent->ce_hal->init(&push);
else if (uvm_channel_is_sec2(channel))
gpu->parent->sec2_hal->init(&push);
else
UVM_ASSERT_MSG(0, "Unknown channel type!");
gpu->parent->sec2_hal->init(&push);
gpu->parent->host_hal->init(&push);
@@ -2153,11 +2164,6 @@ static unsigned channel_pool_type_num_tsgs(uvm_channel_pool_type_t pool_type)
return 1;
}
static bool pool_type_is_valid(uvm_channel_pool_type_t pool_type)
{
return(is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
}
static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_type_t pool_type)
{
if (pool_type == UVM_CHANNEL_POOL_TYPE_SEC2)
@@ -2229,7 +2235,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
unsigned num_tsgs;
uvm_channel_pool_t *pool;
UVM_ASSERT(pool_type_is_valid(pool_type));
UVM_ASSERT(uvm_pool_type_is_valid(pool_type));
pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
channel_manager->num_channel_pools++;
@@ -2260,10 +2266,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
num_channels = channel_pool_type_num_channels(pool_type);
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
if (pool->secure) {
if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
// Use different order lock for SEC2 and WLC channels.
// This allows reserving a SEC2 or WLC channel for indirect work
// submission while holding a reservation for a secure channel.
// submission while holding a reservation for a channel.
uvm_lock_order_t order = uvm_channel_pool_is_sec2(pool) ? UVM_LOCK_ORDER_CSL_SEC2_PUSH :
(uvm_channel_pool_is_wlc(pool) ? UVM_LOCK_ORDER_CSL_WLC_PUSH :
UVM_LOCK_ORDER_CSL_PUSH);
@@ -2297,23 +2303,6 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
return status;
}
static NV_STATUS channel_pool_add_secure(uvm_channel_manager_t *channel_manager,
uvm_channel_pool_type_t pool_type,
unsigned engine_index,
uvm_channel_pool_t **pool_out)
{
uvm_channel_pool_t *pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
pool->secure = true;
return channel_pool_add(channel_manager, pool_type, engine_index, pool_out);
}
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
{
// For now, all channels are secure channels
return true;
}
static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
{
if (!cap->supported || cap->grce)
@@ -2461,13 +2450,6 @@ static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
if (!ce_usable_for_channel_type(type, cap))
continue;
if (uvm_conf_computing_mode_is_hcc(manager->gpu)) {
// All usable CEs are secure
UVM_ASSERT(cap->secure);
// Multi-PCE LCEs are disallowed
UVM_ASSERT(hweight32(cap->cePceMask) == 1);
}
__set_bit(i, manager->ce_mask);
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
@@ -2523,7 +2505,7 @@ out:
return status;
}
// Return the non-secure pool corresponding to the given CE index
// Return the pool corresponding to the given CE index
//
// This function cannot be used to access the proxy pool in SR-IOV heavy.
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
@@ -2734,24 +2716,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
{
unsigned num_channel_pools;
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// Create one CE channel pool per usable CE
num_channel_pools = num_used_ce;
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// CE proxy channel pool.
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
num_channel_pools++;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
// Create one CE secure channel pool per usable CE
if (uvm_conf_computing_mode_is_hcc(manager->gpu))
num_channel_pools += num_used_ce;
// SEC2 pool, WLC pool, LCIC pool
// SEC2 pool, WLC pool, LCIC pool
if (uvm_conf_computing_mode_enabled(manager->gpu))
num_channel_pools += 3;
}
return num_channel_pools;
}
@@ -2783,38 +2758,6 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
return NV_OK;
}
static NV_STATUS channel_manager_create_ce_secure_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
unsigned ce;
if (!uvm_conf_computing_mode_is_hcc(manager->gpu))
return NV_OK;
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
NV_STATUS status;
unsigned type;
uvm_channel_pool_t *pool = NULL;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
if (status != NV_OK)
return status;
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
unsigned preferred = preferred_ce[type];
if (preferred != ce)
continue;
if (uvm_channel_type_requires_secure_pool(manager->gpu, type)) {
UVM_ASSERT(manager->pool_to_use.default_for_type[type] == NULL);
manager->pool_to_use.default_for_type[type] = pool;
}
}
}
return NV_OK;
}
static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(wlc);
@@ -3142,6 +3085,64 @@ static NV_STATUS channel_manager_setup_wlc_lcic(uvm_channel_pool_t *wlc_pool, uv
return NV_OK;
}
static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
NV_STATUS status;
unsigned wlc_lcic_ce_index;
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
if (!uvm_conf_computing_mode_enabled(manager->gpu))
return NV_OK;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
return NV_OK;
}
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
{
NV_STATUS status;
@@ -3162,62 +3163,11 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
if (!manager->channel_pools)
return NV_ERR_NO_MEMORY;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
unsigned wlc_lcic_ce_index;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
status = channel_manager_create_ce_secure_pools(manager, preferred_ce);
}
else {
status = channel_manager_create_ce_pools(manager, preferred_ce);
}
status = channel_manager_create_conf_computing_pools(manager, preferred_ce);
if (status != NV_OK)
return status;
status = channel_manager_create_ce_pools(manager, preferred_ce);
if (status != NV_OK)
return status;

View File

@@ -104,16 +104,14 @@ typedef enum
// ----------------------------------
// Channel type with fixed schedules
// Work Launch Channel (WLC) is a specialized channel
// for launching work on other channels when
// Confidential Computing is enabled.
// It is paired with LCIC (below)
// Work Launch Channel (WLC) is a specialized channel for launching work on
// other channels when the Confidential Computing is feature enabled. It is
// paired with LCIC (below)
UVM_CHANNEL_TYPE_WLC,
// Launch Confirmation Indicator Channel (LCIC) is a
// specialized channel with fixed schedule. It gets
// triggered by executing WLC work, and makes sure that
// WLC get/put pointers are up-to-date.
// Launch Confirmation Indicator Channel (LCIC) is a specialized channel
// with fixed schedule. It gets triggered by executing WLC work, and makes
// sure that WLC get/put pointers are up-to-date.
UVM_CHANNEL_TYPE_LCIC,
UVM_CHANNEL_TYPE_COUNT,
@@ -242,11 +240,9 @@ typedef struct
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a secure channel.
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// See uvm_channel_is_secure() documentation.
bool secure;
} uvm_channel_pool_t;
struct uvm_channel_struct
@@ -304,8 +300,9 @@ struct uvm_channel_struct
// its internal operation and each push may modify this state.
uvm_mutex_t push_lock;
// Every secure channel has cryptographic state in HW, which is
// mirrored here for CPU-side operations.
// When the Confidential Computing feature is enabled, every channel has
// cryptographic state in HW, which is mirrored here for CPU-side
// operations.
UvmCslContext ctx;
bool is_ctx_initialized;
@@ -459,46 +456,28 @@ struct uvm_channel_manager_struct
// Create a channel manager for the GPU
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool);
// A channel is secure if it has HW encryption capabilities.
//
// Secure channels are treated differently in the UVM driver. Each secure
// channel has a unique CSL context associated with it, has relatively
// restrictive reservation policies (in comparison with non-secure channels),
// it is requested to be allocated differently by RM, etc.
static bool uvm_channel_pool_is_secure(uvm_channel_pool_t *pool)
static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
{
return pool->secure;
}
static bool uvm_channel_is_secure(uvm_channel_t *channel)
{
return uvm_channel_pool_is_secure(channel->pool);
return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
}
static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
}
static bool uvm_channel_pool_is_secure_ce(uvm_channel_pool_t *pool)
{
return uvm_channel_pool_is_secure(pool) && uvm_channel_pool_is_ce(pool);
}
static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
}
static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
}
@@ -508,11 +487,6 @@ static bool uvm_channel_is_sec2(uvm_channel_t *channel)
return uvm_channel_pool_is_sec2(channel->pool);
}
static bool uvm_channel_is_secure_ce(uvm_channel_t *channel)
{
return uvm_channel_pool_is_secure_ce(channel->pool);
}
static bool uvm_channel_is_wlc(uvm_channel_t *channel)
{
return uvm_channel_pool_is_wlc(channel->pool);
@@ -523,12 +497,9 @@ static bool uvm_channel_is_lcic(uvm_channel_t *channel)
return uvm_channel_pool_is_lcic(channel->pool);
}
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type);
NV_STATUS uvm_channel_secure_init(uvm_gpu_t *gpu, uvm_channel_t *channel);
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
}
@@ -540,11 +511,7 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
if (uvm_channel_pool_is_wlc(pool) || uvm_channel_pool_is_lcic(pool))
return true;
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
return !uvm_channel_pool_is_sec2(pool);
}
static bool uvm_channel_is_ce(uvm_channel_t *channel)
@@ -686,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
return channel->pool->manager->gpu;
}
static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
{
return channel->pool->manager->pushbuffer;
}
// Index of a channel within the owning pool
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
{

View File

@@ -681,9 +681,10 @@ done:
}
// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
// This test verifies that concurrent pushes using the same secure channel pool
// select different channels.
NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
// This test verifies that concurrent pushes using the same channel pool
// select different channels, when the Confidential Computing feature is
// enabled.
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool;
@@ -703,9 +704,6 @@ NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
uvm_channel_type_t channel_type;
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
if (!uvm_channel_type_requires_secure_pool(gpu, channel_type))
continue;
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_RET(pool != NULL);
@@ -997,7 +995,7 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
if (status != NV_OK)
goto done;
status = test_secure_channel_selection(va_space);
status = test_conf_computing_channel_selection(va_space);
if (status != NV_OK)
goto done;

View File

@@ -579,8 +579,10 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);

View File

@@ -91,9 +91,9 @@ struct uvm_gpu_tracking_semaphore_struct
// Create a semaphore pool for a GPU.
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
// When the Confidential Computing feature is enabled, pools associated with
// secure CE channels are allocated in the CPR of vidmem and as such have
// all the associated access restrictions. Because of this, they're called
// When the Confidential Computing feature is enabled, semaphore pools
// associated with CE channels are allocated in the CPR of vidmem and as such
// have all the associated access restrictions. Because of this, they're called
// secure pools and secure semaphores are allocated out of said secure pools.
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);

View File

@@ -61,7 +61,11 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
// GH180.
parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
// portion for the uvm tests became nonresponsive after
// some time and then failed even after reboot
parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
// accessing GR context buffers support the 57-bit VA range.

View File

@@ -491,7 +491,6 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
UVM_ASSERT(uvm_push_is_fake(push) || uvm_channel_is_secure(push->channel));
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
if (!src.is_virtual)
@@ -540,7 +539,6 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
UVM_ASSERT(!push->channel || uvm_channel_is_secure(push->channel));
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
// The addressing mode (and aperture, if applicable) of the source and

View File

@@ -279,13 +279,14 @@
// Operations not allowed while holding the lock:
// - GPU memory allocation which can evict memory (would require nesting
// block locks)
//
// - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
// Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
// Condition: The Confidential Computing feature is enabled
// Exclusive lock (mutex)
//
// Protects:
// - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
// when the Confidential Computing feature is enabled on the system.
//
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
// gpu->sysmem_mappings.bitlock)
@@ -321,22 +322,25 @@
// Operations not allowed while holding this lock
// - GPU memory allocation which can evict
//
// - Secure channel CSL channel pool semaphore
// - CE channel CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_PUSH
// Semaphore per SEC2 channel pool
// Condition: The Confidential Computing feature is enabled
// Semaphore per CE channel pool
//
// The semaphore controls concurrent pushes to secure channels. Secure work
// submission depends on channel availability in GPFIFO entries (as in any
// other channel type) but also on channel locking. Each secure channel has a
// lock to enforce ordering of pushes. The channel's CSL lock is taken on
// channel reservation until uvm_push_end. Secure channels are stateful
// channels and the CSL lock protects their CSL state/context.
// The semaphore controls concurrent pushes to CE channels that are not WCL
// channels. Secure work submission depends on channel availability in
// GPFIFO entries (as in any other channel type) but also on channel
// locking. Each channel has a lock to enforce ordering of pushes. The
// channel's CSL lock is taken on channel reservation until uvm_push_end.
// When the Confidential Computing feature is enabled, channels are
// stateful, and the CSL lock protects their CSL state/context.
//
// Operations allowed while holding this lock
// - Pushing work to CE secure channels
// - Pushing work to CE channels (except for WLC channels)
//
// - WLC CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
// Condition: The Confidential Computing feature is enabled
// Semaphore per WLC channel pool
//
// The semaphore controls concurrent pushes to WLC channels. WLC work
@@ -346,8 +350,8 @@
// channel reservation until uvm_push_end. SEC2 channels are stateful
// channels and the CSL lock protects their CSL state/context.
//
// This lock ORDER is different and sits below generic secure channel CSL
// lock and above SEC2 CSL lock. This reflects the dual nature of WLC
// This lock ORDER is different and sits below the generic channel CSL
// lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
// channels; they use SEC2 indirect work launch during initialization,
// and after their schedule is initialized they provide indirect launch
// functionality to other CE channels.
@@ -357,6 +361,7 @@
//
// - SEC2 CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
// Condition: The Confidential Computing feature is enabled
// Semaphore per SEC2 channel pool
//
// The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
@@ -366,9 +371,9 @@
// channel reservation until uvm_push_end. SEC2 channels are stateful
// channels and the CSL lock protects their CSL state/context.
//
// This lock ORDER is different and lower than the generic secure channel
// lock to allow secure work submission to use a SEC2 channel to submit
// work before releasing the CSL lock of the originating secure channel.
// This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
// to allow secure work submission to use a SEC2 channel to submit
// work before releasing the CSL lock of the originating channel.
//
// Operations allowed while holding this lock
// - Pushing work to SEC2 channels
@@ -408,16 +413,18 @@
//
// - WLC Channel lock
// Order: UVM_LOCK_ORDER_WLC_CHANNEL
// Condition: The Confidential Computing feature is enabled
// Spinlock (uvm_spinlock_t)
//
// Lock protecting the state of WLC channels in a channel pool. This lock
// is separate from the above generic channel lock to allow for indirect
// worklaunch pushes while holding the main channel lock.
// (WLC pushes don't need any of the pushbuffer locks described above)
// is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
// to allow for indirect worklaunch pushes while holding the main channel
// lock (WLC pushes don't need any of the pushbuffer locks described
// above)
//
// - Tools global VA space list lock (g_tools_va_space_list_lock)
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
// Reader/writer lock (rw_sempahore)
// Reader/writer lock (rw_semaphore)
//
// This lock protects the list of VA spaces used when broadcasting
// UVM profiling events.
@@ -437,9 +444,10 @@
//
// - Tracking semaphores
// Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
// When the Confidential Computing feature is enabled, CE semaphores are
// encrypted, and require to take the CSL lock (UVM_LOCK_ORDER_LEAF) to
// decrypt the payload.
// Condition: The Confidential Computing feature is enabled
//
// CE semaphore payloads are encrypted, and require to take the CSL lock
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
//
// - Leaf locks
// Order: UVM_LOCK_ORDER_LEAF

View File

@@ -392,12 +392,6 @@ static NV_STATUS uvm_mem_alloc_vidmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **me
return uvm_mem_alloc(&params, mem_out);
}
// Helper for allocating protected vidmem with the default page size
static NV_STATUS uvm_mem_alloc_vidmem_protected(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **mem_out)
{
return uvm_mem_alloc_vidmem(size, gpu, mem_out);
}
// Helper for allocating sysmem and mapping it on the CPU
static NV_STATUS uvm_mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, struct mm_struct *mm, uvm_mem_t **mem_out)
{

View File

@@ -134,6 +134,22 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
// first map operation
uvm_page_mask_complement(&va_block_context->caller_page_mask, &va_block->maybe_mapped_pages);
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
// Do not map pages that are already resident on the CPU. This is in
// order to avoid breaking system-wide atomic operations on HMM. HMM's
// implementation of system-side atomic operations involves restricting
// mappings to one processor (CPU or a GPU) at a time. If we were to
// grant a GPU a mapping to system memory, this gets into trouble
// because, on the CPU side, Linux can silently upgrade PTE permissions
// (move from read-only, to read-write, without any MMU notifiers
// firing), thus breaking the model by allowing simultaneous read-write
// access from two separate processors. To avoid that, just don't map
// such pages at all, when migrating.
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
&va_block_context->caller_page_mask,
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU));
}
// Only map those pages that are not mapped anywhere else (likely due
// to a first touch or a migration). We pass
// UvmEventMapRemoteCauseInvalid since the destination processor of a

View File

@@ -391,11 +391,13 @@ uvm_gpu_address_t uvm_push_inline_data_end(uvm_push_inline_data_t *data)
inline_data_address = (NvU64) (uintptr_t)(push->next + 1);
}
else {
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
// Offset of the inlined data within the push.
inline_data_address = (push->next - push->begin + 1) * UVM_METHOD_SIZE;
// Add GPU VA of the push begin
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(channel->pool->manager->pushbuffer, push);
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
}
// This will place a noop right before the inline data that was written.
@@ -438,10 +440,8 @@ NvU64 *uvm_push_timestamp(uvm_push_t *push)
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_timestamp(push, address.address);
else if (uvm_channel_is_sec2(push->channel))
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
else
UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n");
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
return timestamp;
}

View File

@@ -377,11 +377,6 @@ static bool uvm_push_has_space(uvm_push_t *push, NvU32 free_space)
NV_STATUS uvm_push_begin_fake(uvm_gpu_t *gpu, uvm_push_t *push);
void uvm_push_end_fake(uvm_push_t *push);
static bool uvm_push_is_fake(uvm_push_t *push)
{
return !push->channel;
}
// Begin an inline data fragment in the push
//
// The inline data will be ignored by the GPU, but can be referenced from

View File

@@ -40,10 +40,9 @@
static NvU32 get_push_begin_size(uvm_channel_t *channel)
{
if (uvm_channel_is_sec2(channel)) {
// SEC2 channels allocate CSL signature buffer at the beginning.
// SEC2 channels allocate CSL signature buffer at the beginning.
if (uvm_channel_is_sec2(channel))
return UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE + UVM_METHOD_SIZE;
}
return 0;
}
@@ -51,10 +50,14 @@ static NvU32 get_push_begin_size(uvm_channel_t *channel)
// This is the storage required by a semaphore release.
static NvU32 get_push_end_min_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// Space (in bytes) used by uvm_push_end() on a Secure CE channel.
// Note that Secure CE semaphore release pushes two memset and one
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_conf_computing_mode_enabled(gpu)) {
if (uvm_channel_is_ce(channel)) {
// Space (in bytes) used by uvm_push_end() on a CE channel when
// the Confidential Computing feature is enabled.
//
// Note that CE semaphore release pushes two memset and one
// encryption method on top of the regular release.
// Memset size
// -------------
@@ -75,43 +78,44 @@ static NvU32 get_push_end_min_size(uvm_channel_t *channel)
//
// TOTAL : 144 Bytes
// Same as CE + LCIC GPPut update + LCIC doorbell
return 24 + 144 + 24 + 24;
}
else if (uvm_channel_is_secure_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// Same as CE + LCIC GPPut update + LCIC doorbell
return 24 + 144 + 24 + 24;
}
return 24 + 144;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return 24;
}
else if (uvm_channel_is_sec2(channel)) {
UVM_ASSERT(uvm_channel_is_sec2(channel));
// A perfectly aligned inline buffer in SEC2 semaphore release.
// We add UVM_METHOD_SIZE because of the NOP method to reserve
// UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES (the inline buffer.)
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_METHOD_SIZE;
}
return 0;
UVM_ASSERT(uvm_channel_is_ce(channel));
// Space (in bytes) used by uvm_push_end() on a CE channel.
return 24;
}
static NvU32 get_push_end_max_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
return UVM_MAX_WLC_PUSH_SIZE;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return get_push_end_min_size(channel);
}
else if (uvm_channel_is_sec2(channel)) {
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
// Note that SEC2 semaphore release uses an inline buffer with alignment
// requirements. This is the "worst" case semaphore_release storage.
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
}
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
if (uvm_channel_is_wlc(channel))
return UVM_MAX_WLC_PUSH_SIZE;
return 0;
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
// Note that SEC2 semaphore release uses an inline buffer with alignment
// requirements. This is the "worst" case semaphore_release storage.
if (uvm_channel_is_sec2(channel))
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
UVM_ASSERT(uvm_channel_is_ce(channel));
// Space (in bytes) used by uvm_push_end() on a CE channel.
return get_push_end_min_size(channel);
}
static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
@@ -294,10 +298,19 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu;
NvU32 i;
uvm_push_t *pushes;
uvm_tracker_t tracker = UVM_TRACKER_INIT();
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
uvm_tracker_t tracker;
// When the Confidential Computing feature is enabled, a channel reserved at
// the start of a push cannot be reserved again until that push ends. The
// test is waived, because the number of pushes it starts per pool exceeds
// the number of channels in the pool, so it would block indefinitely.
gpu = uvm_va_space_find_first_gpu(va_space);
if ((gpu != NULL) && uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
uvm_tracker_init(&tracker);
// As noted above, this test does unsafe things that would be detected by
// lock tracking, opt-out.
@@ -310,16 +323,11 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
}
for_each_va_space_gpu(gpu, va_space) {
NvU32 i;
// A secure channels reserved at the start of a push cannot be reserved
// again until that push ends. The test would block indefinitely
// if secure pools are not skipped, because the number of pushes started
// per pool exceeds the number of channels in the pool.
if (uvm_channel_type_requires_secure_pool(gpu, channel_type))
goto done;
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
uvm_push_t *push = &pushes[i];
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, push, "concurrent push %u", i);
TEST_CHECK_GOTO(status == NV_OK, done);
}
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {

View File

@@ -458,7 +458,7 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
void *push_unprotected_cpu_va;
NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
if (channel->conf_computing.push_crypto_bundles == NULL)
@@ -499,7 +499,7 @@ void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *g
uvm_pushbuffer_chunk_t *chunk;
bool need_to_update_chunk = false;
uvm_push_info_t *push_info = gpfifo->push_info;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);

View File

@@ -270,7 +270,7 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
*mem = NULL;
if (type == MEM_ALLOC_TYPE_VIDMEM_PROTECTED) {
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
}
@@ -348,9 +348,9 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
return NV_OK;
}
// gpu_encrypt uses a secure CE for encryption (instead of SEC2). SEC2 does not
// support encryption. The following function is copied from uvm_ce_test.c and
// adapted to SEC2 tests.
// gpu_encrypt uses the Copy Engine for encryption, instead of SEC2. SEC2 does
// not support encryption. The following function is copied from uvm_ce_test.c
// and adapted to SEC2 tests.
static void gpu_encrypt(uvm_push_t *push,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,

View File

@@ -229,6 +229,24 @@ static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
uvm_kvfree(pages);
}
// This must be called with the mmap_lock held in read mode or better.
static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
{
struct vm_area_struct *vma;
NvU64 addr = start_va;
NvU64 region_end = start_va + size;
do {
vma = find_vma(mm, addr);
if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
return NV_ERR_INVALID_ARGUMENT;
addr = vma->vm_end;
} while (addr < region_end);
return NV_OK;
}
// Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
// Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
@@ -237,7 +255,6 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
long ret = 0;
long num_pages;
long i;
struct vm_area_struct **vmas = NULL;
*addr = NULL;
*pages = NULL;
@@ -254,22 +271,30 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
goto fail;
}
vmas = uvm_kvmalloc(sizeof(struct vm_area_struct *) * num_pages);
if (vmas == NULL) {
status = NV_ERR_NO_MEMORY;
// Although uvm_down_read_mmap_lock() is preferable due to its participation
// in the UVM lock dependency tracker, it cannot be used here. That's
// because pin_user_pages() may fault in HMM pages which are GPU-resident.
// When that happens, the UVM page fault handler would record another
// mmap_read_lock() on the same thread as this one, leading to a false
// positive lock dependency report.
//
// Therefore, use the lower level nv_mmap_read_lock() here.
nv_mmap_read_lock(current->mm);
status = check_vmas(current->mm, user_va, size);
if (status != NV_OK) {
nv_mmap_read_unlock(current->mm);
goto fail;
}
nv_mmap_read_lock(current->mm);
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, NULL);
nv_mmap_read_unlock(current->mm);
if (ret != num_pages) {
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
}
for (i = 0; i < num_pages; i++) {
if (page_count((*pages)[i]) > MAX_PAGE_COUNT || uvm_file_is_nvidia_uvm(vmas[i]->vm_file)) {
if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
}
@@ -279,15 +304,12 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
if (*addr == NULL)
goto fail;
uvm_kvfree(vmas);
return NV_OK;
fail:
if (*pages == NULL)
return status;
uvm_kvfree(vmas);
if (ret > 0)
uvm_put_user_pages_dirty(*pages, ret);
else if (ret < 0)

View File

@@ -3055,7 +3055,7 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
void *auth_tag_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
void *staging_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
UVM_ASSERT(uvm_channel_is_secure(push->channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(push->gpu));
if (UVM_ID_IS_GPU(copy_state->dst.id))
return NV_OK;
@@ -3106,7 +3106,7 @@ static void block_copy_push(uvm_va_block_t *block,
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
if (uvm_channel_is_secure(push->channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
if (UVM_ID_IS_CPU(copy_state->src.id))
conf_computing_block_copy_push_cpu_to_gpu(block, copy_state, region, push);
else
@@ -3134,19 +3134,18 @@ static NV_STATUS block_copy_end_push(uvm_va_block_t *block,
// at that point.
uvm_push_end(push);
if ((push_status == NV_OK) && uvm_channel_is_secure(push->channel))
if ((push_status == NV_OK) && uvm_conf_computing_mode_enabled(push->gpu))
push_status = conf_computing_copy_pages_finish(block, copy_state, push);
tracker_status = uvm_tracker_add_push_safe(copy_tracker, push);
if (push_status == NV_OK)
push_status = tracker_status;
if (uvm_channel_is_secure(push->channel)) {
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_conf_computing_mode_enabled(push->gpu)) {
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
uvm_tracker_overwrite_with_push(&local_tracker, push);
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
uvm_conf_computing_dma_buffer_free(&push->gpu->conf_computing.dma_buffer_pool,
copy_state->dma_buffer,
&local_tracker);
copy_state->dma_buffer = NULL;
@@ -9612,15 +9611,9 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
if (uvm_processor_mask_empty(&revoke_processors))
new_prot = UVM_PROT_READ_WRITE;
}
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC) {
// HMM allocations with logical read/write/atomic permission can be
// upgraded without notifying the driver so assume read/write/atomic
// even if the fault is only for reading.
if (new_prot == UVM_PROT_READ_WRITE ||
(UVM_ID_IS_CPU(fault_processor_id) && uvm_va_block_is_hmm(va_block))) {
if (uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(new_residency)], fault_processor_id))
new_prot = UVM_PROT_READ_WRITE_ATOMIC;
}
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC && new_prot == UVM_PROT_READ_WRITE) {
if (uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(new_residency)], fault_processor_id))
new_prot = UVM_PROT_READ_WRITE_ATOMIC;
}
return new_prot;
@@ -9857,8 +9850,6 @@ out:
return status == NV_OK ? tracker_status : status;
}
// TODO: Bug 1750144: check logical permissions from HMM to know what's the
// maximum allowed.
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index)
@@ -9935,14 +9926,18 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
// Exclude the processor for which the mapping protections are being computed
uvm_processor_mask_clear(&write_mappings, processor_id);
// At this point, any processor with atomic mappings either has native atomics support to the
// processor with the resident copy or has disabled system-wide atomics. If the requesting
// processor has disabled system-wide atomics or has native atomics to that processor, we can
// map with ATOMIC privileges. Likewise, if there are no other processors with WRITE or ATOMIC
// mappings, we can map with ATOMIC privileges.
// At this point, any processor with atomic mappings either has native
// atomics support to the processor with the resident copy or has
// disabled system-wide atomics. If the requesting processor has
// disabled system-wide atomics or has native atomics to that processor,
// we can map with ATOMIC privileges. Likewise, if there are no other
// processors with WRITE or ATOMIC mappings, we can map with ATOMIC
// privileges. For HMM, don't allow GPU atomic access to remote mapped
// system memory even if there are no write mappings since CPU access
// can be upgraded without notification.
if (!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, processor_id) ||
uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], processor_id) ||
uvm_processor_mask_empty(&write_mappings)) {
(uvm_processor_mask_empty(&write_mappings) && !uvm_va_block_is_hmm(va_block))) {
return UVM_PROT_READ_WRITE_ATOMIC;
}

View File

@@ -86,6 +86,14 @@
#include <linux/ioport.h>
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
#include <linux/cc_platform.h>
#endif
#if defined(NV_ASM_CPUFEATURE_H_PRESENT)
#include <asm/cpufeature.h>
#endif
#include "conftest/patches.h"
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
@@ -139,8 +147,6 @@ struct semaphore nv_linux_devices_lock;
static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE;
NvU64 nv_shared_gpa_boundary = 0;
// True if all the successfully probed devices support ATS
// Assigned at device probe (module init) time
NvBool nv_ats_supported = NVCPU_IS_PPC64LE
@@ -234,77 +240,23 @@ struct dev_pm_ops nv_pm_ops = {
*** STATIC functions
***/
#if defined(NVCPU_X86_64)
#define NV_AMD_SEV_BIT BIT(1)
#define NV_GENMASK_ULL(h, l) \
(((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
static
void get_shared_gpa_boundary(
void nv_detect_conf_compute_platform(
void
)
{
NvU32 priv_high = cpuid_ebx(0x40000003);
if (priv_high & BIT(22))
#if defined(NV_CC_PLATFORM_PRESENT)
os_cc_enabled = cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
#if defined(X86_FEATURE_TDX_GUEST)
if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
{
NvU32 isolation_config_b = cpuid_ebx(0x4000000C);
nv_shared_gpa_boundary = ((NvU64)1) << ((isolation_config_b & NV_GENMASK_ULL(11, 6)) >> 6);
os_cc_tdx_enabled = NV_TRUE;
}
}
static
NvBool nv_is_sev_supported(
void
)
{
unsigned int eax, ebx, ecx, edx;
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return NV_FALSE;
/* By design, a VM using vTOM doesn't see the SEV setting */
get_shared_gpa_boundary();
if (nv_shared_gpa_boundary != 0)
return NV_TRUE;
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & NV_AMD_SEV_BIT))
return NV_FALSE;
return NV_TRUE;
}
#endif
static
void nv_sev_init(
void
)
{
#if defined(MSR_AMD64_SEV) && defined(NVCPU_X86_64)
NvU32 lo_val, hi_val;
if (!nv_is_sev_supported())
return;
rdmsr(MSR_AMD64_SEV, lo_val, hi_val);
os_sev_status = lo_val;
#if defined(MSR_AMD64_SEV_ENABLED)
os_sev_enabled = (os_sev_status & MSR_AMD64_SEV_ENABLED);
#endif
/* By design, a VM using vTOM doesn't see the SEV setting */
if (nv_shared_gpa_boundary != 0)
os_sev_enabled = NV_TRUE;
#else
os_cc_enabled = NV_FALSE;
os_cc_tdx_enabled = NV_FALSE;
#endif
}
@@ -710,7 +662,7 @@ nv_module_init(nv_stack_t **sp)
}
nv_init_rsync_info();
nv_sev_init();
nv_detect_conf_compute_platform();
if (!rm_init_rm(*sp))
{
@@ -4570,19 +4522,19 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
* as the starting address for all DMA mappings.
*/
saved_dma_mask = pci_dev->dma_mask;
if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) != 0)
if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)) != 0)
{
goto done;
}
dma_addr = pci_map_single(pci_dev, NULL, 1, DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(pci_dev, dma_addr))
dma_addr = dma_map_single(&pci_dev->dev, NULL, 1, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&pci_dev->dev, dma_addr))
{
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
pci_unmap_single(pci_dev, dma_addr, 1, DMA_BIDIRECTIONAL);
dma_unmap_single(&pci_dev->dev, dma_addr, 1, DMA_BIDIRECTIONAL);
/*
* From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation
@@ -4614,7 +4566,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
*/
nv_printf(NV_DBG_WARNINGS,
"NVRM: DMA window limited by platform\n");
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
else if ((dma_addr & saved_dma_mask) != 0)
@@ -4633,7 +4585,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
*/
nv_printf(NV_DBG_WARNINGS,
"NVRM: DMA window limited by memory size\n");
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
}

View File

@@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
memset(platformInfo, 0, sizeof(*platformInfo));
platformInfo->atsSupported = nv_ats_supported;
platformInfo->sevEnabled = os_sev_enabled;
platformInfo->sevEnabled = os_cc_enabled;
status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);

View File

@@ -120,6 +120,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache_shared
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_get_domain_bus_and_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_num_physpages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
@@ -156,8 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += full_name_hash
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_enable_atomic_ops_to_root
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pgprot_decrypted
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_mkdec
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
@@ -263,4 +265,4 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += vfio_pci_core_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += mdev_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_init
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_off
NV_CONFTEST_GENERIC_COMPILE_TESTS += memory_failure_mf_sw_simulated_defined
NV_CONFTEST_GENERIC_COMPILE_TESTS += memory_failure_mf_sw_simulated_defined

View File

@@ -41,8 +41,8 @@ extern nv_kthread_q_t nv_kthread_q;
NvU32 os_page_size = PAGE_SIZE;
NvU64 os_page_mask = NV_PAGE_MASK;
NvU8 os_page_shift = PAGE_SHIFT;
NvU32 os_sev_status = 0;
NvBool os_sev_enabled = 0;
NvBool os_cc_enabled = 0;
NvBool os_cc_tdx_enabled = 0;
#if defined(CONFIG_DMA_SHARED_BUFFER)
NvBool os_dma_buf_enabled = NV_TRUE;