mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-26 19:19:47 +00:00
535.98
This commit is contained in:
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.86.10\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.98\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@@ -275,6 +275,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
linux/stringhash.h \
|
||||
linux/dma-map-ops.h \
|
||||
rdma/peer_mem.h \
|
||||
@@ -300,7 +301,9 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/vfio_pci_core.h \
|
||||
linux/mdev.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
soc/tegra/bpmp.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
||||
@@ -511,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
|
||||
void *ptr = ioremap_driver_hardened(phys, size);
|
||||
#else
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
@@ -524,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
|
||||
|
||||
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
void *ptr = ioremap_cache(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
|
||||
ptr = ioremap_cache_shared(phys, size);
|
||||
#elif defined(NV_IOREMAP_CACHE_PRESENT)
|
||||
ptr = ioremap_cache(phys, size);
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
//
|
||||
// ioremap_cache() has been only implemented correctly for ppc64le with
|
||||
@@ -543,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
|
||||
// support on power.
|
||||
//
|
||||
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
|
||||
#else
|
||||
return nv_ioremap(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
{
|
||||
#if defined(NV_IOREMAP_WC_PRESENT)
|
||||
void *ptr = ioremap_wc(phys, size);
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
return ptr;
|
||||
void *ptr = NULL;
|
||||
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
|
||||
ptr = ioremap_driver_hardened_wc(phys, size);
|
||||
#elif defined(NV_IOREMAP_WC_PRESENT)
|
||||
ptr = ioremap_wc(phys, size);
|
||||
#else
|
||||
return nv_ioremap_nocache(phys, size);
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_iounmap(void *ptr, NvU64 size)
|
||||
@@ -634,37 +645,24 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
extern NvU64 nv_shared_gpa_boundary;
|
||||
static inline pgprot_t nv_sme_clr(pgprot_t prot)
|
||||
{
|
||||
#if defined(__sme_clr)
|
||||
return __pgprot(__sme_clr(pgprot_val(prot)));
|
||||
#else
|
||||
return prot;
|
||||
#endif // __sme_clr
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
if (nv_shared_gpa_boundary != 0)
|
||||
{
|
||||
/*
|
||||
* By design, a VM using vTOM doesn't see the SEV setting and
|
||||
* for AMD with vTOM, *set* means decrypted.
|
||||
*/
|
||||
prot = __pgprot(nv_shared_gpa_boundary | (pgprot_val(vm_prot)));
|
||||
}
|
||||
else
|
||||
{
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
}
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
#if defined(pgprot_decrypted)
|
||||
return pgprot_decrypted(prot);
|
||||
#else
|
||||
return nv_sme_clr(prot);
|
||||
#endif // pgprot_decrypted
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
@@ -1324,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
|
||||
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
|
||||
* is active in all cases.
|
||||
*/
|
||||
if (os_sev_enabled)
|
||||
if (os_cc_enabled)
|
||||
swiotlb_in_use = NV_TRUE;
|
||||
#endif
|
||||
|
||||
|
||||
@@ -321,10 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
// The next two fields store UVM_BUFFER_LOCATION values
|
||||
NvU32 gpFifoLoc;
|
||||
NvU32 gpPutLoc;
|
||||
|
||||
// Allocate the channel as secure. This flag should only be set when
|
||||
// Confidential Compute is enabled.
|
||||
NvBool secure;
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@@ -368,9 +364,6 @@ typedef struct
|
||||
// True if the CE can be used for P2P transactions
|
||||
NvBool p2p:1;
|
||||
|
||||
// True if the CE supports encryption
|
||||
NvBool secure:1;
|
||||
|
||||
// Mask of physical CEs assigned to this LCE
|
||||
//
|
||||
// The value returned by RM for this field may change when a GPU is
|
||||
|
||||
@@ -214,8 +214,8 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvU32 os_sev_status;
|
||||
extern NvBool os_sev_enabled;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
|
||||
/*
|
||||
|
||||
@@ -445,6 +445,9 @@ compile_test() {
|
||||
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
#if defined(NV_ASM_PAGE_H_PRESENT)
|
||||
#include <asm/page.h>
|
||||
#endif
|
||||
#include <asm/set_memory.h>
|
||||
#else
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -467,6 +470,9 @@ compile_test() {
|
||||
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
#if defined(NV_ASM_PAGE_H_PRESENT)
|
||||
#include <asm/page.h>
|
||||
#endif
|
||||
#include <asm/set_memory.h>
|
||||
#else
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -524,6 +530,9 @@ compile_test() {
|
||||
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
#if defined(NV_ASM_PAGE_H_PRESENT)
|
||||
#include <asm/page.h>
|
||||
#endif
|
||||
#include <asm/set_memory.h>
|
||||
#else
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -551,6 +560,9 @@ compile_test() {
|
||||
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
#if defined(NV_ASM_PAGE_H_PRESENT)
|
||||
#include <asm/page.h>
|
||||
#endif
|
||||
#include <asm/set_memory.h>
|
||||
#else
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -695,6 +707,50 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_driver_hardened)
|
||||
#
|
||||
# Determine if the ioremap_driver_hardened() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_driver_hardened(void) {
|
||||
ioremap_driver_hardened();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_driver_hardened_wc)
|
||||
#
|
||||
# Determine if the ioremap_driver_hardened_wc() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_driver_hardened_wc(void) {
|
||||
ioremap_driver_hardened_wc();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioremap_cache_shared)
|
||||
#
|
||||
# Determine if the ioremap_cache_shared() function is present.
|
||||
# It does not exist on all architectures.
|
||||
# TODO: Update the commit ID once the API is upstreamed.
|
||||
#
|
||||
CODE="
|
||||
#include <asm/io.h>
|
||||
void conftest_ioremap_cache_shared(void) {
|
||||
ioremap_cache_shared();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
|
||||
;;
|
||||
dom0_kernel_present)
|
||||
# Add config parameter if running on DOM0.
|
||||
if [ -n "$VGX_BUILD" ]; then
|
||||
@@ -4888,40 +4944,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_CHANNEL_STATE_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
pgprot_decrypted)
|
||||
cc_platform_has)
|
||||
#
|
||||
# Determine if the macro 'pgprot_decrypted()' is present.
|
||||
# Determine if 'cc_platform_has()' is present.
|
||||
#
|
||||
# Added by commit 21729f81ce8a ("x86/mm: Provide general kernel
|
||||
# support for memory encryption") in v4.14 (2017-07-18)
|
||||
# Added by commit aa5a461171f9 ("x86/sev: Add an x86 version of
|
||||
# cc_platform_has()") in v5.15.3 (2021-10-04)
|
||||
CODE="
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
void conftest_pgprot_decrypted(void)
|
||||
if(pgprot_decrypted()) {}
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PGPROT_DECRYPTED_PRESENT" "" "functions"
|
||||
|
||||
;;
|
||||
|
||||
cc_mkdec)
|
||||
#
|
||||
# Determine if cc_mkdec() is present.
|
||||
#
|
||||
# cc_mkdec() by commit b577f542f93c ("x86/coco: Add API to handle
|
||||
# encryption mask) in v5.18-rc1 (2022-02-22).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_COCO_H_PRESENT)
|
||||
#include <asm/coco.h>
|
||||
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
|
||||
#include <linux/cc_platform.h>
|
||||
#endif
|
||||
|
||||
void conftest_cc_mkdec(void) {
|
||||
cc_mkdec();
|
||||
void conftest_cc_platfrom_has(void) {
|
||||
cc_platform_has();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CC_MKDEC_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_prime_pages_to_sg_has_drm_device_arg)
|
||||
@@ -6636,8 +6674,8 @@ case "$5" in
|
||||
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
|
||||
|
||||
# On x86_64, vGPU requires MDEV framework to be present.
|
||||
# On aarch64, vGPU requires vfio-pci-core framework to be present.
|
||||
if ([ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" != "0" ]) ||
|
||||
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
|
||||
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
|
||||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
|
||||
exit 0
|
||||
fi
|
||||
@@ -6649,8 +6687,8 @@ case "$5" in
|
||||
echo "CONFIG_VFIO_IOMMU_TYPE1";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_VFIO_PCI_CORE";
|
||||
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
|
||||
|
||||
@@ -1367,8 +1367,23 @@ static struct drm_driver nv_drm_driver = {
|
||||
.ioctls = nv_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
#endif
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
|
||||
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
|
||||
#endif
|
||||
|
||||
.gem_prime_import = nv_drm_gem_prime_import,
|
||||
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
|
||||
|
||||
|
||||
@@ -54,6 +54,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
|
||||
@@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_proxy(push->channel)) {
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (dst.is_virtual) {
|
||||
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
|
||||
return false;
|
||||
@@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
return false;
|
||||
}
|
||||
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
|
||||
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
|
||||
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
|
||||
@@ -177,10 +180,13 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
// irrespective of the virtualization mode.
|
||||
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return;
|
||||
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
|
||||
@@ -760,7 +760,7 @@ static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t
|
||||
|
||||
*mem = NULL;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);
|
||||
|
||||
|
||||
@@ -272,19 +272,26 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
|
||||
static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_secure(channel)) {
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
NvU32 index;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
|
||||
__clear_bit(index, channel->pool->push_locks);
|
||||
uvm_up_out_of_order(&channel->pool->push_sem);
|
||||
}
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
return;
|
||||
|
||||
index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
|
||||
|
||||
__clear_bit(index, channel->pool->push_locks);
|
||||
uvm_up_out_of_order(&channel->pool->push_sem);
|
||||
}
|
||||
|
||||
static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_secure(channel))
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);
|
||||
|
||||
// For CE and proxy channels, we always return that the channel is locked,
|
||||
@@ -295,25 +302,25 @@ static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
|
||||
static void lock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_secure(channel)) {
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
|
||||
|
||||
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
|
||||
__set_bit(index, channel->pool->push_locks);
|
||||
}
|
||||
__set_bit(index, channel->pool->push_locks);
|
||||
}
|
||||
|
||||
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (uvm_channel_is_secure(channel) &&
|
||||
!test_bit(index, channel->pool->push_locks) &&
|
||||
try_claim_channel_locked(channel, num_gpfifo_entries)) {
|
||||
if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
|
||||
lock_channel_for_push(channel);
|
||||
return true;
|
||||
}
|
||||
@@ -321,57 +328,15 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified CE pool
|
||||
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (1) {
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
status = uvm_channel_check_errors(channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
}
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
|
||||
return NV_ERR_GENERIC;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified secure pool
|
||||
static NV_STATUS channel_reserve_in_secure_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
// Reserve a channel in the specified pool. The channel is locked until the push
|
||||
// ends
|
||||
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(pool->secure);
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
|
||||
|
||||
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
|
||||
@@ -426,6 +391,51 @@ done:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified pool
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
|
||||
return channel_reserve_and_lock_in_pool(pool, channel_out);
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (1) {
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
status = uvm_channel_check_errors(channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
}
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
|
||||
|
||||
return NV_ERR_GENERIC;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
|
||||
@@ -433,10 +443,7 @@ NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_t
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
if (pool->secure)
|
||||
return channel_reserve_in_secure_pool(pool, channel_out);
|
||||
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
@@ -452,10 +459,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
if (pool->secure)
|
||||
return channel_reserve_in_secure_pool(pool, channel_out);
|
||||
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
|
||||
@@ -491,7 +495,7 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
|
||||
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_t *channel, NvU64 semaphore_va)
|
||||
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
|
||||
{
|
||||
NvU32 iv_index;
|
||||
uvm_gpu_address_t notifier_gpu_va;
|
||||
@@ -499,12 +503,14 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_
|
||||
uvm_gpu_address_t semaphore_gpu_va;
|
||||
uvm_gpu_address_t encrypted_payload_gpu_va;
|
||||
uvm_gpu_t *gpu = push->gpu;
|
||||
uvm_channel_t *channel = push->channel;
|
||||
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
|
||||
UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
|
||||
NvU32 payload_size = sizeof(*semaphore->payload);
|
||||
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
|
||||
notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
|
||||
@@ -538,19 +544,21 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_manager_t *manager;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(push);
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
// Only SEC2 and WLC with set up fixed schedule can use direct push
|
||||
// submission. All other cases (including WLC pre-schedule) need to
|
||||
// reserve a launch channel that will be used to submit this push
|
||||
// indirectly.
|
||||
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)) &&
|
||||
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager)) &&
|
||||
!uvm_channel_is_sec2(channel)) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
|
||||
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
|
||||
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
|
||||
UVM_CHANNEL_TYPE_WLC :
|
||||
UVM_CHANNEL_TYPE_SEC2;
|
||||
@@ -559,9 +567,9 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
return status;
|
||||
}
|
||||
|
||||
// For secure channels, channel's lock should have been acquired in
|
||||
// uvm_channel_reserve() or channel_reserve_in_secure_pool() before
|
||||
// reaching here.
|
||||
// When the Confidential Computing feature is enabled, the channel's lock
|
||||
// should have already been acquired in uvm_channel_reserve() or
|
||||
// channel_reserve_and_lock_in_pool().
|
||||
UVM_ASSERT(is_channel_locked_for_push(channel));
|
||||
|
||||
push->channel = channel;
|
||||
@@ -586,9 +594,8 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
|
||||
NvU64 *gpfifo_entry;
|
||||
NvU64 pushbuffer_va;
|
||||
uvm_channel_t *channel = push->channel;
|
||||
uvm_channel_manager_t *channel_manager = channel->pool->manager;
|
||||
uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
|
||||
uvm_gpu_t *gpu = channel_manager->gpu;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
BUILD_BUG_ON(sizeof(*gpfifo_entry) != NVB06F_GP_ENTRY__SIZE);
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
@@ -644,12 +651,11 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
|
||||
static void do_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_channel_is_ce(push->channel))
|
||||
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
|
||||
else if (uvm_channel_is_sec2(push->channel))
|
||||
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
|
||||
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
|
||||
}
|
||||
|
||||
static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
|
||||
@@ -668,8 +674,8 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
|
||||
// needs to be scheduled to get an encrypted shadow copy in unprotected
|
||||
// sysmem. This allows UVM to later decrypt it and observe the new
|
||||
// semaphore value.
|
||||
if (uvm_channel_is_secure_ce(push->channel))
|
||||
channel_semaphore_gpu_encrypt_payload(push, push->channel, semaphore_va);
|
||||
if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
|
||||
channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
|
||||
}
|
||||
|
||||
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
|
||||
@@ -746,15 +752,12 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
|
||||
wmb();
|
||||
|
||||
// Ring the WLC doorbell to start processing the above push
|
||||
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset,
|
||||
wlc_channel->channel_info.workSubmissionToken);
|
||||
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset, wlc_channel->channel_info.workSubmissionToken);
|
||||
}
|
||||
|
||||
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
|
||||
NvU32 old_cpu_put,
|
||||
NvU32 new_gpu_put)
|
||||
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
uvm_push_t indirect_push;
|
||||
@@ -767,7 +770,7 @@ static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
|
||||
uvm_gpu_address_t push_enc_auth_tag_gpu;
|
||||
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_ce(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
|
||||
|
||||
// WLC submissions are done under channel lock, so there should be no
|
||||
@@ -848,8 +851,6 @@ static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&gpput_auth_tag_gpu);
|
||||
|
||||
|
||||
|
||||
// Update GPPUT. The update needs 4B write to specific offset,
|
||||
// however we can only do 16B aligned decrypt writes.
|
||||
// A poison value is written to all other locations, this is ignored in
|
||||
@@ -922,7 +923,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
|
||||
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
|
||||
}
|
||||
else {
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
|
||||
|
||||
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
|
||||
@@ -951,11 +952,9 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
|
||||
gpfifo_auth_tag_gpu.address);
|
||||
}
|
||||
|
||||
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
|
||||
NvU32 old_cpu_put,
|
||||
NvU32 new_gpu_put)
|
||||
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
uvm_push_t indirect_push;
|
||||
@@ -968,7 +967,7 @@ static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
|
||||
uvm_gpu_address_t push_auth_tag_gpu;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_ce(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
|
||||
|
||||
// If the old_cpu_put is not equal to the last gpu put, other pushes are
|
||||
@@ -1051,7 +1050,7 @@ static void encrypt_push(uvm_push_t *push)
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 push_size = uvm_push_get_size(push);
|
||||
uvm_push_info_t *push_info = uvm_push_info_from_push(push);
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
@@ -1098,6 +1097,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 push_size;
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
bool needs_sec2_work_submit = false;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
@@ -1112,7 +1112,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
|
||||
if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(push,
|
||||
paired_lcic->channel_info.gpPutGpuVa,
|
||||
paired_lcic->num_gpfifo_entries - 1);
|
||||
@@ -1126,7 +1126,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// pushes. However, direct pushes to WLC can be smaller than this
|
||||
// size. This is used e.g. by indirect submission of control
|
||||
// gpfifo entries.
|
||||
channel_manager->gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
|
||||
gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1144,7 +1144,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// Indirect submission via SEC2/WLC needs pushes to be aligned for
|
||||
// encryption/decryption. The pushbuffer_size of this push
|
||||
// influences starting address of the next push.
|
||||
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)))
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
|
||||
entry->push_info = &channel->push_infos[push->push_info_index];
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
|
||||
@@ -1158,12 +1158,13 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
internal_channel_submit_work_wlc(push);
|
||||
}
|
||||
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
|
||||
else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
|
||||
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
|
||||
}
|
||||
else {
|
||||
// submitting via SEC2 starts a push, postpone until this push is ended
|
||||
// submitting via SEC2 starts a push, postpone until this push is
|
||||
// ended
|
||||
needs_sec2_work_submit = true;
|
||||
}
|
||||
}
|
||||
@@ -1202,12 +1203,13 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
|
||||
static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry, NvU32 new_cpu_put)
|
||||
{
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NvU32 cpu_put = channel->cpu_put;
|
||||
NvU64 *gpfifo_entry;
|
||||
|
||||
UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
return;
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
|
||||
@@ -1234,7 +1236,7 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
|
||||
UVM_CHANNEL_TYPE_WLC :
|
||||
UVM_CHANNEL_TYPE_SEC2;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(channel));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
// If the old_cpu_put is not equal to the last gpu put,
|
||||
// Another push(es) is pending that needs to be submitted.
|
||||
@@ -1290,6 +1292,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
bool needs_indirect_submit = false;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
@@ -1312,7 +1315,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
submit_ctrl_gpfifo(channel, entry, new_cpu_put);
|
||||
if (uvm_conf_computing_mode_enabled(channel->pool->manager->gpu) && !uvm_channel_is_sec2(channel))
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
needs_indirect_submit = true;
|
||||
|
||||
channel->cpu_put = new_cpu_put;
|
||||
@@ -1385,16 +1388,15 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool = channel->pool;
|
||||
|
||||
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
|
||||
// uvm_channel_end_push() routine. Note that different than in
|
||||
// channel_reserve_in_secure_pool, we cannot pick an unlocked channel from
|
||||
// the secure pool, even when there is one available and *channel is locked.
|
||||
// channel_reserve_and_lock_in_pool, we cannot pick an unlocked channel from
|
||||
// the pool, even when there is one available and *channel is locked.
|
||||
// Not a concern given that uvm_channel_reserve() is not the common-case for
|
||||
// channel reservation, and only used for channel initialization, GPFIFO
|
||||
// control work submission, and testing.
|
||||
@@ -1409,6 +1411,8 @@ static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gp
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (1) {
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
channel_pool_lock(pool);
|
||||
@@ -1436,9 +1440,10 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (uvm_channel_is_secure(channel))
|
||||
return uvm_channel_reserve_secure(channel, num_gpfifo_entries);
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return channel_reserve_and_lock(channel, num_gpfifo_entries);
|
||||
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
return NV_OK;
|
||||
@@ -1578,8 +1583,10 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
|
||||
static NV_STATUS csl_init(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure(channel));
|
||||
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
|
||||
@@ -1589,7 +1596,7 @@ static NV_STATUS csl_init(uvm_channel_t *channel)
|
||||
else {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(channel->pool->manager->gpu));
|
||||
uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -1609,7 +1616,10 @@ static void csl_destroy(uvm_channel_t *channel)
|
||||
|
||||
static void free_conf_computing_buffers(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
|
||||
uvm_rm_mem_free(channel->conf_computing.static_pb_unprotected_sysmem);
|
||||
@@ -1637,10 +1647,12 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
|
||||
static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
sizeof(semaphore->conf_computing.last_pushed_notifier),
|
||||
@@ -1679,7 +1691,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
|
||||
|
||||
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
@@ -1723,7 +1735,7 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
|
||||
|
||||
static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
const size_t notifier_size = sizeof(*channel->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);
|
||||
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
@@ -1758,8 +1770,10 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
|
||||
static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
status = alloc_conf_computing_buffers_semaphore(channel);
|
||||
if (status != NV_OK)
|
||||
@@ -1772,7 +1786,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
status = alloc_conf_computing_buffers_lcic(channel);
|
||||
}
|
||||
else {
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
|
||||
channel->num_gpfifo_entries);
|
||||
|
||||
@@ -1793,6 +1806,8 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
|
||||
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(pool->num_channels > 0);
|
||||
|
||||
if (channel->tracking_sem.queued_value > 0) {
|
||||
@@ -1816,9 +1831,10 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
|
||||
uvm_kvfree(channel->gpfifo_entries);
|
||||
|
||||
if (uvm_channel_is_secure(channel)) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
csl_destroy(channel);
|
||||
if (uvm_channel_is_secure_ce(channel))
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
free_conf_computing_buffers(channel);
|
||||
}
|
||||
|
||||
@@ -1905,8 +1921,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
|
||||
channel_alloc_params.gpPutLoc = UVM_BUFFER_LOCATION_SYS;
|
||||
}
|
||||
|
||||
channel_alloc_params.secure = channel->pool->secure;
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(channel_get_tsg(channel),
|
||||
&channel_alloc_params,
|
||||
&channel->handle,
|
||||
@@ -1928,8 +1942,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
|
||||
channel_info->hwChannelId,
|
||||
uvm_channel_is_sec2(channel) ? "SEC2" :
|
||||
uvm_channel_is_wlc(channel) ? "WLC" :
|
||||
uvm_channel_is_lcic(channel) ? "LCIC" :
|
||||
uvm_channel_is_secure(channel) ? "CE (secure)" : "CE",
|
||||
uvm_channel_is_lcic(channel) ? "LCIC" : "CE",
|
||||
channel->pool->engine_index);
|
||||
|
||||
return NV_OK;
|
||||
@@ -1981,7 +1994,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
channel->tools.pending_event_count = 0;
|
||||
INIT_LIST_HEAD(&channel->tools.channel_list_node);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
semaphore_pool = gpu->secure_semaphore_pool;
|
||||
|
||||
status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
|
||||
@@ -2007,7 +2020,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (uvm_channel_is_secure(channel)) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
status = csl_init(channel);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
@@ -2075,7 +2088,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
|
||||
NvU64 gpfifo_entry;
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
|
||||
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
@@ -2095,10 +2108,8 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
gpu->parent->ce_hal->init(&push);
|
||||
else if (uvm_channel_is_sec2(channel))
|
||||
gpu->parent->sec2_hal->init(&push);
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Unknown channel type!");
|
||||
gpu->parent->sec2_hal->init(&push);
|
||||
|
||||
gpu->parent->host_hal->init(&push);
|
||||
|
||||
@@ -2153,11 +2164,6 @@ static unsigned channel_pool_type_num_tsgs(uvm_channel_pool_type_t pool_type)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool pool_type_is_valid(uvm_channel_pool_type_t pool_type)
|
||||
{
|
||||
return(is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
|
||||
}
|
||||
|
||||
static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_type_t pool_type)
|
||||
{
|
||||
if (pool_type == UVM_CHANNEL_POOL_TYPE_SEC2)
|
||||
@@ -2229,7 +2235,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
unsigned num_tsgs;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
UVM_ASSERT(pool_type_is_valid(pool_type));
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool_type));
|
||||
|
||||
pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
|
||||
channel_manager->num_channel_pools++;
|
||||
@@ -2260,10 +2266,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
if (pool->secure) {
|
||||
if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
|
||||
// Use different order lock for SEC2 and WLC channels.
|
||||
// This allows reserving a SEC2 or WLC channel for indirect work
|
||||
// submission while holding a reservation for a secure channel.
|
||||
// submission while holding a reservation for a channel.
|
||||
uvm_lock_order_t order = uvm_channel_pool_is_sec2(pool) ? UVM_LOCK_ORDER_CSL_SEC2_PUSH :
|
||||
(uvm_channel_pool_is_wlc(pool) ? UVM_LOCK_ORDER_CSL_WLC_PUSH :
|
||||
UVM_LOCK_ORDER_CSL_PUSH);
|
||||
@@ -2297,23 +2303,6 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_pool_add_secure(uvm_channel_manager_t *channel_manager,
|
||||
uvm_channel_pool_type_t pool_type,
|
||||
unsigned engine_index,
|
||||
uvm_channel_pool_t **pool_out)
|
||||
{
|
||||
uvm_channel_pool_t *pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
|
||||
|
||||
pool->secure = true;
|
||||
return channel_pool_add(channel_manager, pool_type, engine_index, pool_out);
|
||||
}
|
||||
|
||||
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
|
||||
{
|
||||
// For now, all channels are secure channels
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
|
||||
{
|
||||
if (!cap->supported || cap->grce)
|
||||
@@ -2461,13 +2450,6 @@ static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
|
||||
if (!ce_usable_for_channel_type(type, cap))
|
||||
continue;
|
||||
|
||||
if (uvm_conf_computing_mode_is_hcc(manager->gpu)) {
|
||||
// All usable CEs are secure
|
||||
UVM_ASSERT(cap->secure);
|
||||
|
||||
// Multi-PCE LCEs are disallowed
|
||||
UVM_ASSERT(hweight32(cap->cePceMask) == 1);
|
||||
}
|
||||
__set_bit(i, manager->ce_mask);
|
||||
|
||||
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
@@ -2523,7 +2505,7 @@ out:
|
||||
return status;
|
||||
}
|
||||
|
||||
// Return the non-secure pool corresponding to the given CE index
|
||||
// Return the pool corresponding to the given CE index
|
||||
//
|
||||
// This function cannot be used to access the proxy pool in SR-IOV heavy.
|
||||
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
|
||||
@@ -2734,24 +2716,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
unsigned num_channel_pools;
|
||||
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Create one CE channel pool per usable CE
|
||||
num_channel_pools = num_used_ce;
|
||||
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
|
||||
|
||||
// Create one CE secure channel pool per usable CE
|
||||
if (uvm_conf_computing_mode_is_hcc(manager->gpu))
|
||||
num_channel_pools += num_used_ce;
|
||||
|
||||
// SEC2 pool, WLC pool, LCIC pool
|
||||
// SEC2 pool, WLC pool, LCIC pool
|
||||
if (uvm_conf_computing_mode_enabled(manager->gpu))
|
||||
num_channel_pools += 3;
|
||||
}
|
||||
|
||||
return num_channel_pools;
|
||||
}
|
||||
@@ -2783,38 +2758,6 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_ce_secure_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
|
||||
if (!uvm_conf_computing_mode_is_hcc(manager->gpu))
|
||||
return NV_OK;
|
||||
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
unsigned preferred = preferred_ce[type];
|
||||
|
||||
if (preferred != ce)
|
||||
continue;
|
||||
|
||||
if (uvm_channel_type_requires_secure_pool(manager->gpu, type)) {
|
||||
UVM_ASSERT(manager->pool_to_use.default_for_type[type] == NULL);
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(wlc);
|
||||
@@ -3142,6 +3085,64 @@ static NV_STATUS channel_manager_setup_wlc_lcic(uvm_channel_pool_t *wlc_pool, uv
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
NV_STATUS status;
|
||||
unsigned wlc_lcic_ce_index;
|
||||
uvm_channel_pool_t *sec2_pool = NULL;
|
||||
uvm_channel_pool_t *wlc_pool = NULL;
|
||||
uvm_channel_pool_t *lcic_pool = NULL;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(manager->gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc(manager->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
sizeof(UvmCslIv),
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&manager->gpu->conf_computing.iv_rm_mem);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Create SEC2 pool. This needs to be done first, initialization of
|
||||
// other channels needs SEC2.
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
|
||||
|
||||
// Use the same CE as CPU TO GPU channels for WLC/LCIC
|
||||
// Both need to use the same engine for the fixed schedule to work.
|
||||
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
|
||||
// work launch
|
||||
// Find a metric to select the best CE to use
|
||||
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
|
||||
// Create WLC/LCIC pools. This should be done early, CE channels use
|
||||
// them for secure launch. The WLC pool must be created before the LCIC.
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// The LCIC pool must be assigned after the call to
|
||||
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
|
||||
// are ready to be used for secure work submission.
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -3162,62 +3163,11 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
if (!manager->channel_pools)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
|
||||
uvm_channel_pool_t *sec2_pool = NULL;
|
||||
uvm_channel_pool_t *wlc_pool = NULL;
|
||||
uvm_channel_pool_t *lcic_pool = NULL;
|
||||
unsigned wlc_lcic_ce_index;
|
||||
|
||||
status = uvm_rm_mem_alloc(manager->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
sizeof(UvmCslIv),
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&manager->gpu->conf_computing.iv_rm_mem);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Create SEC2 pool. This needs to be done first, initialization of
|
||||
// other channels needs SEC2.
|
||||
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
|
||||
|
||||
// Use the same CE as CPU TO GPU channels for WLC/LCIC
|
||||
// Both need to use the same engine for the fixed schedule to work.
|
||||
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
|
||||
// work launch
|
||||
// Find a metric to select the best CE to use
|
||||
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
|
||||
// Create WLC/LCIC pools. This should be done early, CE channels use
|
||||
// them for secure launch. The WLC pool must be created before the LCIC.
|
||||
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
|
||||
|
||||
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// The LCIC pool must be assigned after the call to
|
||||
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
|
||||
// are ready to be used for secure work submission.
|
||||
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
|
||||
status = channel_manager_create_ce_secure_pools(manager, preferred_ce);
|
||||
}
|
||||
else {
|
||||
status = channel_manager_create_ce_pools(manager, preferred_ce);
|
||||
}
|
||||
status = channel_manager_create_conf_computing_pools(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = channel_manager_create_ce_pools(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
|
||||
@@ -104,16 +104,14 @@ typedef enum
|
||||
// ----------------------------------
|
||||
// Channel type with fixed schedules
|
||||
|
||||
// Work Launch Channel (WLC) is a specialized channel
|
||||
// for launching work on other channels when
|
||||
// Confidential Computing is enabled.
|
||||
// It is paired with LCIC (below)
|
||||
// Work Launch Channel (WLC) is a specialized channel for launching work on
|
||||
// other channels when the Confidential Computing is feature enabled. It is
|
||||
// paired with LCIC (below)
|
||||
UVM_CHANNEL_TYPE_WLC,
|
||||
|
||||
// Launch Confirmation Indicator Channel (LCIC) is a
|
||||
// specialized channel with fixed schedule. It gets
|
||||
// triggered by executing WLC work, and makes sure that
|
||||
// WLC get/put pointers are up-to-date.
|
||||
// Launch Confirmation Indicator Channel (LCIC) is a specialized channel
|
||||
// with fixed schedule. It gets triggered by executing WLC work, and makes
|
||||
// sure that WLC get/put pointers are up-to-date.
|
||||
UVM_CHANNEL_TYPE_LCIC,
|
||||
|
||||
UVM_CHANNEL_TYPE_COUNT,
|
||||
@@ -242,11 +240,9 @@ typedef struct
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a secure channel.
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
|
||||
// See uvm_channel_is_secure() documentation.
|
||||
bool secure;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -304,8 +300,9 @@ struct uvm_channel_struct
|
||||
// its internal operation and each push may modify this state.
|
||||
uvm_mutex_t push_lock;
|
||||
|
||||
// Every secure channel has cryptographic state in HW, which is
|
||||
// mirrored here for CPU-side operations.
|
||||
// When the Confidential Computing feature is enabled, every channel has
|
||||
// cryptographic state in HW, which is mirrored here for CPU-side
|
||||
// operations.
|
||||
UvmCslContext ctx;
|
||||
bool is_ctx_initialized;
|
||||
|
||||
@@ -459,46 +456,28 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool);
|
||||
|
||||
// A channel is secure if it has HW encryption capabilities.
|
||||
//
|
||||
// Secure channels are treated differently in the UVM driver. Each secure
|
||||
// channel has a unique CSL context associated with it, has relatively
|
||||
// restrictive reservation policies (in comparison with non-secure channels),
|
||||
// it is requested to be allocated differently by RM, etc.
|
||||
static bool uvm_channel_pool_is_secure(uvm_channel_pool_t *pool)
|
||||
static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
|
||||
{
|
||||
return pool->secure;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_secure(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_secure(channel->pool);
|
||||
return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_secure_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return uvm_channel_pool_is_secure(pool) && uvm_channel_pool_is_ce(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
|
||||
}
|
||||
@@ -508,11 +487,6 @@ static bool uvm_channel_is_sec2(uvm_channel_t *channel)
|
||||
return uvm_channel_pool_is_sec2(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_secure_ce(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_secure_ce(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_wlc(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_wlc(channel->pool);
|
||||
@@ -523,12 +497,9 @@ static bool uvm_channel_is_lcic(uvm_channel_t *channel)
|
||||
return uvm_channel_pool_is_lcic(channel->pool);
|
||||
}
|
||||
|
||||
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type);
|
||||
NV_STATUS uvm_channel_secure_init(uvm_gpu_t *gpu, uvm_channel_t *channel);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
@@ -540,11 +511,7 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
if (uvm_channel_pool_is_wlc(pool) || uvm_channel_pool_is_lcic(pool))
|
||||
return true;
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
|
||||
return !uvm_channel_pool_is_sec2(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
@@ -686,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
return channel->pool->manager->gpu;
|
||||
}
|
||||
|
||||
static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
|
||||
{
|
||||
return channel->pool->manager->pushbuffer;
|
||||
}
|
||||
|
||||
// Index of a channel within the owning pool
|
||||
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
@@ -681,9 +681,10 @@ done:
|
||||
}
|
||||
|
||||
// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
|
||||
// This test verifies that concurrent pushes using the same secure channel pool
|
||||
// select different channels.
|
||||
NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
|
||||
// This test verifies that concurrent pushes using the same channel pool
|
||||
// select different channels, when the Confidential Computing feature is
|
||||
// enabled.
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
@@ -703,9 +704,6 @@ NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
if (!uvm_channel_type_requires_secure_pool(gpu, channel_type))
|
||||
continue;
|
||||
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
|
||||
@@ -997,7 +995,7 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_secure_channel_selection(va_space);
|
||||
status = test_conf_computing_channel_selection(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
|
||||
@@ -579,8 +579,10 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
|
||||
@@ -91,9 +91,9 @@ struct uvm_gpu_tracking_semaphore_struct
|
||||
// Create a semaphore pool for a GPU.
|
||||
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
||||
|
||||
// When the Confidential Computing feature is enabled, pools associated with
|
||||
// secure CE channels are allocated in the CPR of vidmem and as such have
|
||||
// all the associated access restrictions. Because of this, they're called
|
||||
// When the Confidential Computing feature is enabled, semaphore pools
|
||||
// associated with CE channels are allocated in the CPR of vidmem and as such
|
||||
// have all the associated access restrictions. Because of this, they're called
|
||||
// secure pools and secure semaphores are allocated out of said secure pools.
|
||||
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
||||
|
||||
|
||||
@@ -61,7 +61,11 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// GH180.
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
|
||||
// portion for the uvm tests became nonresponsive after
|
||||
// some time and then failed even after reboot
|
||||
parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
|
||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
// accessing GR context buffers support the 57-bit VA range.
|
||||
|
||||
@@ -491,7 +491,6 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(uvm_push_is_fake(push) || uvm_channel_is_secure(push->channel));
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
if (!src.is_virtual)
|
||||
@@ -540,7 +539,6 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(!push->channel || uvm_channel_is_secure(push->channel));
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
// The addressing mode (and aperture, if applicable) of the source and
|
||||
|
||||
@@ -279,13 +279,14 @@
|
||||
// Operations not allowed while holding the lock:
|
||||
// - GPU memory allocation which can evict memory (would require nesting
|
||||
// block locks)
|
||||
//
|
||||
// - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
|
||||
// Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Exclusive lock (mutex)
|
||||
//
|
||||
// Protects:
|
||||
// - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
|
||||
// when the Confidential Computing feature is enabled on the system.
|
||||
//
|
||||
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
|
||||
// gpu->sysmem_mappings.bitlock)
|
||||
@@ -321,22 +322,25 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
// - Secure channel CSL channel pool semaphore
|
||||
// - CE channel CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_PUSH
|
||||
// Semaphore per SEC2 channel pool
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per CE channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to secure channels. Secure work
|
||||
// submission depends on channel availability in GPFIFO entries (as in any
|
||||
// other channel type) but also on channel locking. Each secure channel has a
|
||||
// lock to enforce ordering of pushes. The channel's CSL lock is taken on
|
||||
// channel reservation until uvm_push_end. Secure channels are stateful
|
||||
// channels and the CSL lock protects their CSL state/context.
|
||||
// The semaphore controls concurrent pushes to CE channels that are not WCL
|
||||
// channels. Secure work submission depends on channel availability in
|
||||
// GPFIFO entries (as in any other channel type) but also on channel
|
||||
// locking. Each channel has a lock to enforce ordering of pushes. The
|
||||
// channel's CSL lock is taken on channel reservation until uvm_push_end.
|
||||
// When the Confidential Computing feature is enabled, channels are
|
||||
// stateful, and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to CE secure channels
|
||||
// - Pushing work to CE channels (except for WLC channels)
|
||||
//
|
||||
// - WLC CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per WLC channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to WLC channels. WLC work
|
||||
@@ -346,8 +350,8 @@
|
||||
// channel reservation until uvm_push_end. SEC2 channels are stateful
|
||||
// channels and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// This lock ORDER is different and sits below generic secure channel CSL
|
||||
// lock and above SEC2 CSL lock. This reflects the dual nature of WLC
|
||||
// This lock ORDER is different and sits below the generic channel CSL
|
||||
// lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
|
||||
// channels; they use SEC2 indirect work launch during initialization,
|
||||
// and after their schedule is initialized they provide indirect launch
|
||||
// functionality to other CE channels.
|
||||
@@ -357,6 +361,7 @@
|
||||
//
|
||||
// - SEC2 CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Semaphore per SEC2 channel pool
|
||||
//
|
||||
// The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
|
||||
@@ -366,9 +371,9 @@
|
||||
// channel reservation until uvm_push_end. SEC2 channels are stateful
|
||||
// channels and the CSL lock protects their CSL state/context.
|
||||
//
|
||||
// This lock ORDER is different and lower than the generic secure channel
|
||||
// lock to allow secure work submission to use a SEC2 channel to submit
|
||||
// work before releasing the CSL lock of the originating secure channel.
|
||||
// This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
|
||||
// to allow secure work submission to use a SEC2 channel to submit
|
||||
// work before releasing the CSL lock of the originating channel.
|
||||
//
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to SEC2 channels
|
||||
@@ -408,16 +413,18 @@
|
||||
//
|
||||
// - WLC Channel lock
|
||||
// Order: UVM_LOCK_ORDER_WLC_CHANNEL
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
// Spinlock (uvm_spinlock_t)
|
||||
//
|
||||
// Lock protecting the state of WLC channels in a channel pool. This lock
|
||||
// is separate from the above generic channel lock to allow for indirect
|
||||
// worklaunch pushes while holding the main channel lock.
|
||||
// (WLC pushes don't need any of the pushbuffer locks described above)
|
||||
// is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
|
||||
// to allow for indirect worklaunch pushes while holding the main channel
|
||||
// lock (WLC pushes don't need any of the pushbuffer locks described
|
||||
// above)
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
// Reader/writer lock (rw_sempahore)
|
||||
// Reader/writer lock (rw_semaphore)
|
||||
//
|
||||
// This lock protects the list of VA spaces used when broadcasting
|
||||
// UVM profiling events.
|
||||
@@ -437,9 +444,10 @@
|
||||
//
|
||||
// - Tracking semaphores
|
||||
// Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
|
||||
// When the Confidential Computing feature is enabled, CE semaphores are
|
||||
// encrypted, and require to take the CSL lock (UVM_LOCK_ORDER_LEAF) to
|
||||
// decrypt the payload.
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
//
|
||||
// CE semaphore payloads are encrypted, and require to take the CSL lock
|
||||
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
|
||||
//
|
||||
// - Leaf locks
|
||||
// Order: UVM_LOCK_ORDER_LEAF
|
||||
|
||||
@@ -392,12 +392,6 @@ static NV_STATUS uvm_mem_alloc_vidmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **me
|
||||
return uvm_mem_alloc(¶ms, mem_out);
|
||||
}
|
||||
|
||||
// Helper for allocating protected vidmem with the default page size
|
||||
static NV_STATUS uvm_mem_alloc_vidmem_protected(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **mem_out)
|
||||
{
|
||||
return uvm_mem_alloc_vidmem(size, gpu, mem_out);
|
||||
}
|
||||
|
||||
// Helper for allocating sysmem and mapping it on the CPU
|
||||
static NV_STATUS uvm_mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, struct mm_struct *mm, uvm_mem_t **mem_out)
|
||||
{
|
||||
|
||||
@@ -134,6 +134,22 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
// first map operation
|
||||
uvm_page_mask_complement(&va_block_context->caller_page_mask, &va_block->maybe_mapped_pages);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
|
||||
// Do not map pages that are already resident on the CPU. This is in
|
||||
// order to avoid breaking system-wide atomic operations on HMM. HMM's
|
||||
// implementation of system-side atomic operations involves restricting
|
||||
// mappings to one processor (CPU or a GPU) at a time. If we were to
|
||||
// grant a GPU a mapping to system memory, this gets into trouble
|
||||
// because, on the CPU side, Linux can silently upgrade PTE permissions
|
||||
// (move from read-only, to read-write, without any MMU notifiers
|
||||
// firing), thus breaking the model by allowing simultaneous read-write
|
||||
// access from two separate processors. To avoid that, just don't map
|
||||
// such pages at all, when migrating.
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU));
|
||||
}
|
||||
|
||||
// Only map those pages that are not mapped anywhere else (likely due
|
||||
// to a first touch or a migration). We pass
|
||||
// UvmEventMapRemoteCauseInvalid since the destination processor of a
|
||||
|
||||
@@ -391,11 +391,13 @@ uvm_gpu_address_t uvm_push_inline_data_end(uvm_push_inline_data_t *data)
|
||||
inline_data_address = (NvU64) (uintptr_t)(push->next + 1);
|
||||
}
|
||||
else {
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
|
||||
// Offset of the inlined data within the push.
|
||||
inline_data_address = (push->next - push->begin + 1) * UVM_METHOD_SIZE;
|
||||
|
||||
// Add GPU VA of the push begin
|
||||
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(channel->pool->manager->pushbuffer, push);
|
||||
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
}
|
||||
|
||||
// This will place a noop right before the inline data that was written.
|
||||
@@ -438,10 +440,8 @@ NvU64 *uvm_push_timestamp(uvm_push_t *push)
|
||||
|
||||
if (uvm_channel_is_ce(push->channel))
|
||||
gpu->parent->ce_hal->semaphore_timestamp(push, address.address);
|
||||
else if (uvm_channel_is_sec2(push->channel))
|
||||
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n");
|
||||
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
|
||||
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
@@ -377,11 +377,6 @@ static bool uvm_push_has_space(uvm_push_t *push, NvU32 free_space)
|
||||
NV_STATUS uvm_push_begin_fake(uvm_gpu_t *gpu, uvm_push_t *push);
|
||||
void uvm_push_end_fake(uvm_push_t *push);
|
||||
|
||||
static bool uvm_push_is_fake(uvm_push_t *push)
|
||||
{
|
||||
return !push->channel;
|
||||
}
|
||||
|
||||
// Begin an inline data fragment in the push
|
||||
//
|
||||
// The inline data will be ignored by the GPU, but can be referenced from
|
||||
|
||||
@@ -40,10 +40,9 @@
|
||||
|
||||
static NvU32 get_push_begin_size(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_sec2(channel)) {
|
||||
// SEC2 channels allocate CSL signature buffer at the beginning.
|
||||
// SEC2 channels allocate CSL signature buffer at the beginning.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
return UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE + UVM_METHOD_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -51,10 +50,14 @@ static NvU32 get_push_begin_size(uvm_channel_t *channel)
|
||||
// This is the storage required by a semaphore release.
|
||||
static NvU32 get_push_end_min_size(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
if (uvm_channel_is_wlc(channel)) {
|
||||
// Space (in bytes) used by uvm_push_end() on a Secure CE channel.
|
||||
// Note that Secure CE semaphore release pushes two memset and one
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
// Space (in bytes) used by uvm_push_end() on a CE channel when
|
||||
// the Confidential Computing feature is enabled.
|
||||
//
|
||||
// Note that CE semaphore release pushes two memset and one
|
||||
// encryption method on top of the regular release.
|
||||
// Memset size
|
||||
// -------------
|
||||
@@ -75,43 +78,44 @@ static NvU32 get_push_end_min_size(uvm_channel_t *channel)
|
||||
//
|
||||
// TOTAL : 144 Bytes
|
||||
|
||||
// Same as CE + LCIC GPPut update + LCIC doorbell
|
||||
return 24 + 144 + 24 + 24;
|
||||
}
|
||||
else if (uvm_channel_is_secure_ce(channel)) {
|
||||
if (uvm_channel_is_wlc(channel)) {
|
||||
// Same as CE + LCIC GPPut update + LCIC doorbell
|
||||
return 24 + 144 + 24 + 24;
|
||||
}
|
||||
|
||||
return 24 + 144;
|
||||
}
|
||||
// Space (in bytes) used by uvm_push_end() on a CE channel.
|
||||
return 24;
|
||||
}
|
||||
else if (uvm_channel_is_sec2(channel)) {
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_sec2(channel));
|
||||
|
||||
// A perfectly aligned inline buffer in SEC2 semaphore release.
|
||||
// We add UVM_METHOD_SIZE because of the NOP method to reserve
|
||||
// UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES (the inline buffer.)
|
||||
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_METHOD_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
// Space (in bytes) used by uvm_push_end() on a CE channel.
|
||||
return 24;
|
||||
}
|
||||
|
||||
static NvU32 get_push_end_max_size(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
if (uvm_channel_is_wlc(channel)) {
|
||||
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
|
||||
return UVM_MAX_WLC_PUSH_SIZE;
|
||||
}
|
||||
// Space (in bytes) used by uvm_push_end() on a CE channel.
|
||||
return get_push_end_min_size(channel);
|
||||
}
|
||||
else if (uvm_channel_is_sec2(channel)) {
|
||||
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
|
||||
// Note that SEC2 semaphore release uses an inline buffer with alignment
|
||||
// requirements. This is the "worst" case semaphore_release storage.
|
||||
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
|
||||
}
|
||||
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
|
||||
if (uvm_channel_is_wlc(channel))
|
||||
return UVM_MAX_WLC_PUSH_SIZE;
|
||||
|
||||
return 0;
|
||||
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
|
||||
// Note that SEC2 semaphore release uses an inline buffer with alignment
|
||||
// requirements. This is the "worst" case semaphore_release storage.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
// Space (in bytes) used by uvm_push_end() on a CE channel.
|
||||
return get_push_end_min_size(channel);
|
||||
}
|
||||
|
||||
static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
|
||||
@@ -294,10 +298,19 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
uvm_push_t *pushes;
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the Confidential Computing feature is enabled, a channel reserved at
|
||||
// the start of a push cannot be reserved again until that push ends. The
|
||||
// test is waived, because the number of pushes it starts per pool exceeds
|
||||
// the number of channels in the pool, so it would block indefinitely.
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
if ((gpu != NULL) && uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
uvm_tracker_init(&tracker);
|
||||
|
||||
// As noted above, this test does unsafe things that would be detected by
|
||||
// lock tracking, opt-out.
|
||||
@@ -310,16 +323,11 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
|
||||
}
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
NvU32 i;
|
||||
|
||||
// A secure channels reserved at the start of a push cannot be reserved
|
||||
// again until that push ends. The test would block indefinitely
|
||||
// if secure pools are not skipped, because the number of pushes started
|
||||
// per pool exceeds the number of channels in the pool.
|
||||
if (uvm_channel_type_requires_secure_pool(gpu, channel_type))
|
||||
goto done;
|
||||
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, push, "concurrent push %u", i);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
|
||||
|
||||
@@ -458,7 +458,7 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
|
||||
void *push_unprotected_cpu_va;
|
||||
NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
|
||||
NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
|
||||
|
||||
if (channel->conf_computing.push_crypto_bundles == NULL)
|
||||
@@ -499,7 +499,7 @@ void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *g
|
||||
uvm_pushbuffer_chunk_t *chunk;
|
||||
bool need_to_update_chunk = false;
|
||||
uvm_push_info_t *push_info = gpfifo->push_info;
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
|
||||
UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
|
||||
|
||||
|
||||
@@ -270,7 +270,7 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
|
||||
*mem = NULL;
|
||||
|
||||
if (type == MEM_ALLOC_TYPE_VIDMEM_PROTECTED) {
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
|
||||
}
|
||||
@@ -348,9 +348,9 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// gpu_encrypt uses a secure CE for encryption (instead of SEC2). SEC2 does not
|
||||
// support encryption. The following function is copied from uvm_ce_test.c and
|
||||
// adapted to SEC2 tests.
|
||||
// gpu_encrypt uses the Copy Engine for encryption, instead of SEC2. SEC2 does
|
||||
// not support encryption. The following function is copied from uvm_ce_test.c
|
||||
// and adapted to SEC2 tests.
|
||||
static void gpu_encrypt(uvm_push_t *push,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
|
||||
@@ -229,6 +229,24 @@ static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
|
||||
uvm_kvfree(pages);
|
||||
}
|
||||
|
||||
// This must be called with the mmap_lock held in read mode or better.
|
||||
static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
NvU64 addr = start_va;
|
||||
NvU64 region_end = start_va + size;
|
||||
|
||||
do {
|
||||
vma = find_vma(mm, addr);
|
||||
if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
addr = vma->vm_end;
|
||||
} while (addr < region_end);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
|
||||
// Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
|
||||
static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
|
||||
@@ -237,7 +255,6 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
|
||||
long ret = 0;
|
||||
long num_pages;
|
||||
long i;
|
||||
struct vm_area_struct **vmas = NULL;
|
||||
|
||||
*addr = NULL;
|
||||
*pages = NULL;
|
||||
@@ -254,22 +271,30 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
|
||||
goto fail;
|
||||
}
|
||||
|
||||
vmas = uvm_kvmalloc(sizeof(struct vm_area_struct *) * num_pages);
|
||||
if (vmas == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
// Although uvm_down_read_mmap_lock() is preferable due to its participation
|
||||
// in the UVM lock dependency tracker, it cannot be used here. That's
|
||||
// because pin_user_pages() may fault in HMM pages which are GPU-resident.
|
||||
// When that happens, the UVM page fault handler would record another
|
||||
// mmap_read_lock() on the same thread as this one, leading to a false
|
||||
// positive lock dependency report.
|
||||
//
|
||||
// Therefore, use the lower level nv_mmap_read_lock() here.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
status = check_vmas(current->mm, user_va, size);
|
||||
if (status != NV_OK) {
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
|
||||
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret != num_pages) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
if (page_count((*pages)[i]) > MAX_PAGE_COUNT || uvm_file_is_nvidia_uvm(vmas[i]->vm_file)) {
|
||||
if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto fail;
|
||||
}
|
||||
@@ -279,15 +304,12 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
|
||||
if (*addr == NULL)
|
||||
goto fail;
|
||||
|
||||
uvm_kvfree(vmas);
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
if (*pages == NULL)
|
||||
return status;
|
||||
|
||||
uvm_kvfree(vmas);
|
||||
|
||||
if (ret > 0)
|
||||
uvm_put_user_pages_dirty(*pages, ret);
|
||||
else if (ret < 0)
|
||||
|
||||
@@ -3055,7 +3055,7 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
|
||||
void *auth_tag_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
void *staging_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_secure(push->channel));
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(push->gpu));
|
||||
|
||||
if (UVM_ID_IS_GPU(copy_state->dst.id))
|
||||
return NV_OK;
|
||||
@@ -3106,7 +3106,7 @@ static void block_copy_push(uvm_va_block_t *block,
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
if (uvm_channel_is_secure(push->channel)) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (UVM_ID_IS_CPU(copy_state->src.id))
|
||||
conf_computing_block_copy_push_cpu_to_gpu(block, copy_state, region, push);
|
||||
else
|
||||
@@ -3134,19 +3134,18 @@ static NV_STATUS block_copy_end_push(uvm_va_block_t *block,
|
||||
// at that point.
|
||||
uvm_push_end(push);
|
||||
|
||||
if ((push_status == NV_OK) && uvm_channel_is_secure(push->channel))
|
||||
if ((push_status == NV_OK) && uvm_conf_computing_mode_enabled(push->gpu))
|
||||
push_status = conf_computing_copy_pages_finish(block, copy_state, push);
|
||||
|
||||
tracker_status = uvm_tracker_add_push_safe(copy_tracker, push);
|
||||
if (push_status == NV_OK)
|
||||
push_status = tracker_status;
|
||||
|
||||
if (uvm_channel_is_secure(push->channel)) {
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
if (uvm_conf_computing_mode_enabled(push->gpu)) {
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
uvm_tracker_overwrite_with_push(&local_tracker, push);
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
|
||||
uvm_conf_computing_dma_buffer_free(&push->gpu->conf_computing.dma_buffer_pool,
|
||||
copy_state->dma_buffer,
|
||||
&local_tracker);
|
||||
copy_state->dma_buffer = NULL;
|
||||
@@ -9612,15 +9611,9 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
|
||||
if (uvm_processor_mask_empty(&revoke_processors))
|
||||
new_prot = UVM_PROT_READ_WRITE;
|
||||
}
|
||||
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC) {
|
||||
// HMM allocations with logical read/write/atomic permission can be
|
||||
// upgraded without notifying the driver so assume read/write/atomic
|
||||
// even if the fault is only for reading.
|
||||
if (new_prot == UVM_PROT_READ_WRITE ||
|
||||
(UVM_ID_IS_CPU(fault_processor_id) && uvm_va_block_is_hmm(va_block))) {
|
||||
if (uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(new_residency)], fault_processor_id))
|
||||
new_prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC && new_prot == UVM_PROT_READ_WRITE) {
|
||||
if (uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(new_residency)], fault_processor_id))
|
||||
new_prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
|
||||
return new_prot;
|
||||
@@ -9857,8 +9850,6 @@ out:
|
||||
return status == NV_OK ? tracker_status : status;
|
||||
}
|
||||
|
||||
// TODO: Bug 1750144: check logical permissions from HMM to know what's the
|
||||
// maximum allowed.
|
||||
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index)
|
||||
@@ -9935,14 +9926,18 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
|
||||
// Exclude the processor for which the mapping protections are being computed
|
||||
uvm_processor_mask_clear(&write_mappings, processor_id);
|
||||
|
||||
// At this point, any processor with atomic mappings either has native atomics support to the
|
||||
// processor with the resident copy or has disabled system-wide atomics. If the requesting
|
||||
// processor has disabled system-wide atomics or has native atomics to that processor, we can
|
||||
// map with ATOMIC privileges. Likewise, if there are no other processors with WRITE or ATOMIC
|
||||
// mappings, we can map with ATOMIC privileges.
|
||||
// At this point, any processor with atomic mappings either has native
|
||||
// atomics support to the processor with the resident copy or has
|
||||
// disabled system-wide atomics. If the requesting processor has
|
||||
// disabled system-wide atomics or has native atomics to that processor,
|
||||
// we can map with ATOMIC privileges. Likewise, if there are no other
|
||||
// processors with WRITE or ATOMIC mappings, we can map with ATOMIC
|
||||
// privileges. For HMM, don't allow GPU atomic access to remote mapped
|
||||
// system memory even if there are no write mappings since CPU access
|
||||
// can be upgraded without notification.
|
||||
if (!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, processor_id) ||
|
||||
uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], processor_id) ||
|
||||
uvm_processor_mask_empty(&write_mappings)) {
|
||||
(uvm_processor_mask_empty(&write_mappings) && !uvm_va_block_is_hmm(va_block))) {
|
||||
return UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
|
||||
|
||||
@@ -86,6 +86,14 @@
|
||||
|
||||
#include <linux/ioport.h>
|
||||
|
||||
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
|
||||
#include <linux/cc_platform.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_ASM_CPUFEATURE_H_PRESENT)
|
||||
#include <asm/cpufeature.h>
|
||||
#endif
|
||||
|
||||
#include "conftest/patches.h"
|
||||
|
||||
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
|
||||
@@ -139,8 +147,6 @@ struct semaphore nv_linux_devices_lock;
|
||||
|
||||
static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE;
|
||||
|
||||
NvU64 nv_shared_gpa_boundary = 0;
|
||||
|
||||
// True if all the successfully probed devices support ATS
|
||||
// Assigned at device probe (module init) time
|
||||
NvBool nv_ats_supported = NVCPU_IS_PPC64LE
|
||||
@@ -234,77 +240,23 @@ struct dev_pm_ops nv_pm_ops = {
|
||||
*** STATIC functions
|
||||
***/
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
#define NV_AMD_SEV_BIT BIT(1)
|
||||
|
||||
#define NV_GENMASK_ULL(h, l) \
|
||||
(((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
|
||||
|
||||
static
|
||||
void get_shared_gpa_boundary(
|
||||
void nv_detect_conf_compute_platform(
|
||||
void
|
||||
)
|
||||
{
|
||||
NvU32 priv_high = cpuid_ebx(0x40000003);
|
||||
if (priv_high & BIT(22))
|
||||
#if defined(NV_CC_PLATFORM_PRESENT)
|
||||
os_cc_enabled = cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
|
||||
|
||||
#if defined(X86_FEATURE_TDX_GUEST)
|
||||
if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
|
||||
{
|
||||
NvU32 isolation_config_b = cpuid_ebx(0x4000000C);
|
||||
nv_shared_gpa_boundary = ((NvU64)1) << ((isolation_config_b & NV_GENMASK_ULL(11, 6)) >> 6);
|
||||
os_cc_tdx_enabled = NV_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
NvBool nv_is_sev_supported(
|
||||
void
|
||||
)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
/* Check for the SME/SEV support leaf */
|
||||
eax = 0x80000000;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (eax < 0x8000001f)
|
||||
return NV_FALSE;
|
||||
|
||||
/* By design, a VM using vTOM doesn't see the SEV setting */
|
||||
get_shared_gpa_boundary();
|
||||
if (nv_shared_gpa_boundary != 0)
|
||||
return NV_TRUE;
|
||||
|
||||
eax = 0x8000001f;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
/* Check whether SEV is supported */
|
||||
if (!(eax & NV_AMD_SEV_BIT))
|
||||
return NV_FALSE;
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void nv_sev_init(
|
||||
void
|
||||
)
|
||||
{
|
||||
#if defined(MSR_AMD64_SEV) && defined(NVCPU_X86_64)
|
||||
NvU32 lo_val, hi_val;
|
||||
|
||||
if (!nv_is_sev_supported())
|
||||
return;
|
||||
|
||||
rdmsr(MSR_AMD64_SEV, lo_val, hi_val);
|
||||
|
||||
os_sev_status = lo_val;
|
||||
#if defined(MSR_AMD64_SEV_ENABLED)
|
||||
os_sev_enabled = (os_sev_status & MSR_AMD64_SEV_ENABLED);
|
||||
#endif
|
||||
|
||||
/* By design, a VM using vTOM doesn't see the SEV setting */
|
||||
if (nv_shared_gpa_boundary != 0)
|
||||
os_sev_enabled = NV_TRUE;
|
||||
|
||||
#else
|
||||
os_cc_enabled = NV_FALSE;
|
||||
os_cc_tdx_enabled = NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -710,7 +662,7 @@ nv_module_init(nv_stack_t **sp)
|
||||
}
|
||||
|
||||
nv_init_rsync_info();
|
||||
nv_sev_init();
|
||||
nv_detect_conf_compute_platform();
|
||||
|
||||
if (!rm_init_rm(*sp))
|
||||
{
|
||||
@@ -4570,19 +4522,19 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
|
||||
* as the starting address for all DMA mappings.
|
||||
*/
|
||||
saved_dma_mask = pci_dev->dma_mask;
|
||||
if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) != 0)
|
||||
if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)) != 0)
|
||||
{
|
||||
goto done;
|
||||
}
|
||||
|
||||
dma_addr = pci_map_single(pci_dev, NULL, 1, DMA_BIDIRECTIONAL);
|
||||
if (pci_dma_mapping_error(pci_dev, dma_addr))
|
||||
dma_addr = dma_map_single(&pci_dev->dev, NULL, 1, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&pci_dev->dev, dma_addr))
|
||||
{
|
||||
pci_set_dma_mask(pci_dev, saved_dma_mask);
|
||||
dma_set_mask(&pci_dev->dev, saved_dma_mask);
|
||||
goto done;
|
||||
}
|
||||
|
||||
pci_unmap_single(pci_dev, dma_addr, 1, DMA_BIDIRECTIONAL);
|
||||
dma_unmap_single(&pci_dev->dev, dma_addr, 1, DMA_BIDIRECTIONAL);
|
||||
|
||||
/*
|
||||
* From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation
|
||||
@@ -4614,7 +4566,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
|
||||
*/
|
||||
nv_printf(NV_DBG_WARNINGS,
|
||||
"NVRM: DMA window limited by platform\n");
|
||||
pci_set_dma_mask(pci_dev, saved_dma_mask);
|
||||
dma_set_mask(&pci_dev->dev, saved_dma_mask);
|
||||
goto done;
|
||||
}
|
||||
else if ((dma_addr & saved_dma_mask) != 0)
|
||||
@@ -4633,7 +4585,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
|
||||
*/
|
||||
nv_printf(NV_DBG_WARNINGS,
|
||||
"NVRM: DMA window limited by memory size\n");
|
||||
pci_set_dma_mask(pci_dev, saved_dma_mask);
|
||||
dma_set_mask(&pci_dev->dev, saved_dma_mask);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
|
||||
memset(platformInfo, 0, sizeof(*platformInfo));
|
||||
platformInfo->atsSupported = nv_ats_supported;
|
||||
|
||||
platformInfo->sevEnabled = os_sev_enabled;
|
||||
platformInfo->sevEnabled = os_cc_enabled;
|
||||
|
||||
status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);
|
||||
|
||||
|
||||
@@ -120,6 +120,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_array_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_array_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_wc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened_wc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache_shared
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_get_domain_bus_and_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_num_physpages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
@@ -156,8 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += full_name_hash
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_enable_atomic_ops_to_root
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pgprot_decrypted
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_mkdec
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@@ -263,4 +265,4 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += vfio_pci_core_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += mdev_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_init
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_off
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += memory_failure_mf_sw_simulated_defined
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += memory_failure_mf_sw_simulated_defined
|
||||
|
||||
@@ -41,8 +41,8 @@ extern nv_kthread_q_t nv_kthread_q;
|
||||
NvU32 os_page_size = PAGE_SIZE;
|
||||
NvU64 os_page_mask = NV_PAGE_MASK;
|
||||
NvU8 os_page_shift = PAGE_SHIFT;
|
||||
NvU32 os_sev_status = 0;
|
||||
NvBool os_sev_enabled = 0;
|
||||
NvBool os_cc_enabled = 0;
|
||||
NvBool os_cc_tdx_enabled = 0;
|
||||
|
||||
#if defined(CONFIG_DMA_SHARED_BUFFER)
|
||||
NvBool os_dma_buf_enabled = NV_TRUE;
|
||||
|
||||
Reference in New Issue
Block a user