mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-26 19:19:47 +00:00
535.54.03
This commit is contained in:
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.02\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.54.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -510,6 +510,12 @@ struct nv_file_private_t
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
|
||||
//
|
||||
// True if a client or an event was ever allocated on this fd.
|
||||
// If false, RMAPI cleanup is skipped.
|
||||
//
|
||||
NvBool bCleanupRmapi;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
@@ -959,6 +965,8 @@ NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU
|
||||
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
|
||||
@@ -1455,12 +1455,12 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
concurrently with the same UvmCslContext parameter in different threads. The caller must
|
||||
guarantee this exclusion.
|
||||
|
||||
* nvUvmInterfaceCslLogDeviceEncryption
|
||||
* nvUvmInterfaceCslRotateIv
|
||||
* nvUvmInterfaceCslEncrypt
|
||||
* nvUvmInterfaceCslDecrypt
|
||||
* nvUvmInterfaceCslSign
|
||||
* nvUvmInterfaceCslQueryMessagePool
|
||||
* nvUvmInterfaceCslIncrementIv
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -1495,62 +1495,17 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
*/
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogDeviceEncryption
|
||||
|
||||
Returns an IV that can be later used in the nvUvmInterfaceCslEncrypt
|
||||
method. The IV contains a "freshness bit" which value is set by this method
|
||||
and subsequently dirtied by nvUvmInterfaceCslEncrypt to prevent
|
||||
non-malicious reuse of the IV.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
encryptIv[OUT] - Parameter that is stored before a successful
|
||||
device encryption. It is used as an input to
|
||||
nvUvmInterfaceCslEncrypt.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - New IV would cause a counter to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *encryptIv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogDeviceEncryption
|
||||
|
||||
Logs and checks information about device encryption.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
decryptIv[OUT] - Parameter that is stored before a successful
|
||||
device encryption. It is used as an input to
|
||||
nvUvmInterfaceCslDecrypt.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
|
||||
to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *decryptIv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
|
||||
Rotates the IV for a given channel and direction.
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with direction equal to
|
||||
UVM_CSL_DIR_GPU_TO_CPU. Similiarly, outstanding messages that have been
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with direction equal to UVM_CSL_DIR_CPU_TO_GPU. For a given direction
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
|
||||
@@ -1559,17 +1514,17 @@ NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
direction[IN] - Either
|
||||
- UVM_CSL_DIR_CPU_TO_GPU
|
||||
- UVM_CSL_DIR_GPU_TO_CPU
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
|
||||
to overflow.
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid value for direction.
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslDirection direction);
|
||||
UvmCslOperation operation);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslEncrypt
|
||||
@@ -1580,7 +1535,7 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
this function produces undefined behavior. Performance is typically
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
The encryptIV can be obtained from nvUvmInterfaceCslAcquireEncryptionIv.
|
||||
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
@@ -1623,12 +1578,18 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[IN] - Size of the input and output buffers in
|
||||
units of bytes. Value can range from 1 byte
|
||||
to (2^32) - 1 bytes.
|
||||
decryptIv[IN] - Parameter given by nvUvmInterfaceCslLogDeviceEncryption.
|
||||
bufferSize[IN] - Size of the input and output buffers in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
calculate the authentication tag. Can be NULL.
|
||||
addAuthDataSize[IN] - Size of the additional authenticated data in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
This parameter is ignored if addAuthData is NULL.
|
||||
authTagBuffer[IN] - Address of authentication tag buffer.
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
@@ -1643,6 +1604,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
NvU8 const *authTagBuffer);
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -1673,7 +1636,6 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
NvU8 const *inputBuffer,
|
||||
NvU8 *authTagBuffer);
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslQueryMessagePool
|
||||
|
||||
@@ -1684,14 +1646,45 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
direction[IN] - Either UVM_CSL_DIR_CPU_TO_GPU or UVM_CSL_DIR_GPU_TO_CPU.
|
||||
operation[IN] - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
|
||||
messageNum[OUT] - Number of messages left before overflow.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the direction parameter is illegal.
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
UvmCslDirection direction,
|
||||
UvmCslOperation operation,
|
||||
NvU64 *messageNum);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslIncrementIv
|
||||
|
||||
Increments the message counter by the specified amount.
|
||||
|
||||
If iv is non-NULL then the incremented value is returned.
|
||||
If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
|
||||
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
increment[IN] - The amount by which the IV is incremented. Can be 0.
|
||||
iv[out] - If non-NULL, a buffer to store the incremented IV.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
|
||||
in an overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 increment,
|
||||
UvmCslIv *iv);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -286,6 +286,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
@@ -1060,10 +1061,10 @@ typedef struct UvmCslIv
|
||||
NvU8 fresh;
|
||||
} UvmCslIv;
|
||||
|
||||
typedef enum UvmCslDirection
|
||||
typedef enum UvmCslOperation
|
||||
{
|
||||
UVM_CSL_DIR_CPU_TO_GPU,
|
||||
UVM_CSL_DIR_GPU_TO_CPU
|
||||
} UvmCslDirection;
|
||||
UVM_CSL_OPERATION_ENCRYPT,
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -103,13 +103,12 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_acquire_encryption_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -919,6 +919,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_MIGRATION_OPS_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
vfio_precopy_info)
|
||||
#
|
||||
# Determine if vfio_precopy_info struct is present or not
|
||||
#
|
||||
# Added by commit 4db52602a6074 ("vfio: Extend the device migration
|
||||
# protocol with PRE_COPY" in v6.2
|
||||
#
|
||||
CODE="
|
||||
#include <linux/vfio.h>
|
||||
struct vfio_precopy_info precopy_info;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_PRECOPY_INFO_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
vfio_log_ops)
|
||||
#
|
||||
# Determine if vfio_log_ops struct is present or not
|
||||
|
||||
@@ -179,6 +179,7 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
|
||||
return drm_gem_handle_create(filp, &nv_gem->base, handle);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
@@ -187,6 +188,7 @@ static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem
|
||||
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
|
||||
@@ -338,11 +338,6 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
if (!gpu->parent->ce_hal->memcopy_is_valid(&push, dst, src)) {
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The input virtual addresses exist in UVM's internal address space, not
|
||||
// the proxy address space
|
||||
if (uvm_channel_is_proxy(push.channel)) {
|
||||
@@ -401,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
|
||||
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
bool is_proxy_va_space;
|
||||
bool is_proxy_va_space = false;
|
||||
uvm_gpu_address_t gpu_verif_addr;
|
||||
void *cpu_verif_addr;
|
||||
uvm_mem_t *verif_mem = NULL;
|
||||
@@ -437,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
}
|
||||
}
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[0],
|
||||
gpu_addresses[0],
|
||||
size,
|
||||
element_sizes[s],
|
||||
gpu_verif_addr,
|
||||
cpu_verif_addr,
|
||||
i),
|
||||
done);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Because gpu_verif_addr is in sysmem, when the Confidential
|
||||
// Computing feature is enabled, only the previous cases are valid.
|
||||
// TODO: Bug 3839176: the test partially waived on Confidential
|
||||
// Computing because it assumes that GPU can access system memory
|
||||
// without using encryption.
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Using a page size equal to the allocation size ensures that the UVM
|
||||
// memories about to be allocated are physically contiguous. And since the
|
||||
// size is a valid GPU page size, the memories can be virtually mapped on
|
||||
@@ -448,37 +471,22 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
// Physical address in sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
|
||||
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Physical address in vidmem
|
||||
mem_params.backing_gpu = gpu;
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
|
||||
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by vidmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
|
||||
is_proxy_va_space = false;
|
||||
gpu_addresses[2] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
// Virtual address (in UVM's internal address space) backed by sysmem
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
|
||||
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
// Because gpu_verif_addr is in sysmem, when the Confidential
|
||||
// Computing feature is enabled, only the following cases are
|
||||
// valid.
|
||||
//
|
||||
// TODO: Bug 3839176: the test partially waived on
|
||||
// Confidential Computing because it assumes that GPU can
|
||||
// access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu) &&
|
||||
!(gpu_addresses[k].is_unprotected && gpu_addresses[j].is_unprotected)) {
|
||||
continue;
|
||||
}
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
gpu_addresses[k],
|
||||
gpu_addresses[j],
|
||||
|
||||
@@ -750,9 +750,9 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
|
||||
wlc_channel->channel_info.workSubmissionToken);
|
||||
}
|
||||
|
||||
static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
|
||||
NvU32 old_cpu_put,
|
||||
NvU32 new_gpu_put)
|
||||
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
|
||||
NvU32 old_cpu_put,
|
||||
NvU32 new_gpu_put)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@@ -765,10 +765,211 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
|
||||
NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
|
||||
void *push_enc_auth_tag;
|
||||
uvm_gpu_address_t push_enc_auth_tag_gpu;
|
||||
NvU64 gpfifo_gpu = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
|
||||
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
|
||||
|
||||
// WLC submissions are done under channel lock, so there should be no
|
||||
// contention to get the right submission order.
|
||||
UVM_ASSERT(push->channel->conf_computing.gpu_put == old_cpu_put);
|
||||
|
||||
// This can never stall or return error. WLC launch after WLC channels are
|
||||
// initialized uses private static pb space and it neither needs the general
|
||||
// PB space, nor it counts towards max concurrent pushes.
|
||||
status = uvm_push_begin_on_reserved_channel(push->launch_channel,
|
||||
&indirect_push,
|
||||
"Worklaunch to '%s' via '%s'",
|
||||
push->channel->name,
|
||||
push->launch_channel->name);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
|
||||
// Move over the pushbuffer data
|
||||
// WLC channels use a static preallocated space for launch auth tags
|
||||
push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
|
||||
push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
|
||||
|
||||
uvm_conf_computing_cpu_encrypt(indirect_push.channel,
|
||||
push_enc_cpu,
|
||||
push->begin,
|
||||
NULL,
|
||||
uvm_push_get_size(push),
|
||||
push_enc_auth_tag);
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
gpu->parent->ce_hal->decrypt(&indirect_push,
|
||||
uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
|
||||
uvm_gpu_address_virtual(push_enc_gpu),
|
||||
uvm_push_get_size(push),
|
||||
push_enc_auth_tag_gpu);
|
||||
|
||||
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
|
||||
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
|
||||
uvm_push_get_size(push),
|
||||
UVM_GPFIFO_SYNC_PROCEED);
|
||||
|
||||
gpu->parent->ce_hal->memset_8(&indirect_push,
|
||||
uvm_gpu_address_virtual(gpfifo_gpu_va),
|
||||
gpfifo_entry,
|
||||
sizeof(gpfifo_entry));
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push,
|
||||
push->channel->channel_info.workSubmissionOffsetGpuVa,
|
||||
push->channel->channel_info.workSubmissionToken);
|
||||
|
||||
// Ignore return value of push_wait. It can only fail with channel error
|
||||
// which will be detected when waiting for the primary push.
|
||||
(void)uvm_push_end_and_wait(&indirect_push);
|
||||
|
||||
push->channel->conf_computing.gpu_put = new_gpu_put;
|
||||
}
|
||||
|
||||
static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 new_gpu_put)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
|
||||
void *gpput_auth_tag_cpu, *gpput_enc_cpu;
|
||||
uvm_gpu_address_t gpput_auth_tag_gpu, gpput_enc_gpu;
|
||||
NvU32 gpput_scratchpad[UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT/sizeof(new_gpu_put)];
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
|
||||
|
||||
gpput_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
|
||||
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
|
||||
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
|
||||
&gpput_enc_gpu);
|
||||
gpput_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&gpput_auth_tag_gpu);
|
||||
|
||||
|
||||
|
||||
// Update GPPUT. The update needs 4B write to specific offset,
|
||||
// however we can only do 16B aligned decrypt writes.
|
||||
// A poison value is written to all other locations, this is ignored in
|
||||
// most locations and overwritten by HW for GPGET location
|
||||
memset(gpput_scratchpad, 0, sizeof(gpput_scratchpad));
|
||||
UVM_ASSERT(sizeof(*gpput_scratchpad) == sizeof(new_gpu_put));
|
||||
gpput_scratchpad[(channel->channel_info.gpPutGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
|
||||
sizeof(*gpput_scratchpad)] = new_gpu_put;
|
||||
|
||||
// Set value of GPGET to be the same as GPPUT. It will be overwritten by
|
||||
// HW next time GET value changes. UVM never reads GPGET.
|
||||
// However, RM does read it when freeing a channel. When this function
|
||||
// is called from 'channel_manager_stop_wlc' we set the value of GPGET
|
||||
// to the same value as GPPUT. Mismatch between these two values makes
|
||||
// RM wait for any "pending" tasks, leading to significant delays in the
|
||||
// channel teardown sequence.
|
||||
UVM_ASSERT(channel->channel_info.gpPutGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT ==
|
||||
channel->channel_info.gpGetGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
gpput_scratchpad[(channel->channel_info.gpGetGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
|
||||
sizeof(*gpput_scratchpad)] = new_gpu_put;
|
||||
|
||||
uvm_conf_computing_cpu_encrypt(sec2_push->channel,
|
||||
gpput_enc_cpu,
|
||||
gpput_scratchpad,
|
||||
NULL,
|
||||
sizeof(gpput_scratchpad),
|
||||
gpput_auth_tag_cpu);
|
||||
gpu->parent->sec2_hal->decrypt(sec2_push,
|
||||
UVM_ALIGN_DOWN(channel->channel_info.gpPutGpuVa,
|
||||
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
|
||||
gpput_enc_gpu.address,
|
||||
sizeof(gpput_scratchpad),
|
||||
gpput_auth_tag_gpu.address);
|
||||
}
|
||||
|
||||
static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 put, NvU64 value)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
|
||||
void *gpfifo_auth_tag_cpu, *gpfifo_enc_cpu;
|
||||
uvm_gpu_address_t gpfifo_auth_tag_gpu, gpfifo_enc_gpu;
|
||||
NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + put * sizeof(value);
|
||||
NvU64 gpfifo_scratchpad[2];
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
|
||||
|
||||
gpfifo_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
|
||||
sizeof(gpfifo_scratchpad),
|
||||
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
|
||||
&gpfifo_enc_gpu);
|
||||
gpfifo_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&gpfifo_auth_tag_gpu);
|
||||
|
||||
if (IS_ALIGNED(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT)) {
|
||||
gpfifo_scratchpad[0] = value;
|
||||
|
||||
// Set the value of the odd entry to noop.
|
||||
// It will be overwritten when the next entry is submitted.
|
||||
gpu->parent->host_hal->set_gpfifo_noop(&gpfifo_scratchpad[1]);
|
||||
}
|
||||
else {
|
||||
uvm_gpfifo_entry_t *previous_gpfifo;
|
||||
|
||||
UVM_ASSERT(put > 0);
|
||||
|
||||
previous_gpfifo = &channel->gpfifo_entries[put - 1];
|
||||
|
||||
if (previous_gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
|
||||
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
|
||||
}
|
||||
else {
|
||||
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
|
||||
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
|
||||
|
||||
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
|
||||
// used only in static WLC schedule.
|
||||
// Overwriting the previous entry with the same value doesn't hurt,
|
||||
// whether the previous entry has been processed or not
|
||||
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_scratchpad[0],
|
||||
prev_pb_va,
|
||||
previous_gpfifo->pushbuffer_size,
|
||||
UVM_GPFIFO_SYNC_PROCEED);
|
||||
}
|
||||
|
||||
gpfifo_scratchpad[1] = value;
|
||||
}
|
||||
|
||||
uvm_conf_computing_cpu_encrypt(sec2_push->channel,
|
||||
gpfifo_enc_cpu,
|
||||
gpfifo_scratchpad,
|
||||
NULL,
|
||||
sizeof(gpfifo_scratchpad),
|
||||
gpfifo_auth_tag_cpu);
|
||||
gpu->parent->sec2_hal->decrypt(sec2_push,
|
||||
UVM_ALIGN_DOWN(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
|
||||
gpfifo_enc_gpu.address,
|
||||
sizeof(gpfifo_scratchpad),
|
||||
gpfifo_auth_tag_gpu.address);
|
||||
}
|
||||
|
||||
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
|
||||
NvU32 old_cpu_put,
|
||||
NvU32 new_gpu_put)
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
uvm_push_t indirect_push;
|
||||
NV_STATUS status;
|
||||
NvU64 gpfifo_entry;
|
||||
|
||||
void *push_enc_cpu = uvm_pushbuffer_get_unprotected_cpu_va_for_push(pushbuffer, push);
|
||||
NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
|
||||
void *push_auth_tag_cpu;
|
||||
uvm_gpu_address_t push_auth_tag_gpu;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
|
||||
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
|
||||
|
||||
// If the old_cpu_put is not equal to the last gpu put, other pushes are
|
||||
// pending that need to be submitted. That push/es' submission will update
|
||||
@@ -790,60 +991,36 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
|
||||
|
||||
|
||||
// Move over the pushbuffer data
|
||||
if (uvm_channel_is_sec2(indirect_push.channel)) {
|
||||
push_enc_auth_tag = uvm_push_get_single_inline_buffer(&indirect_push,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&push_enc_auth_tag_gpu);
|
||||
}
|
||||
else {
|
||||
// Auth tags cannot be in protected vidmem.
|
||||
// WLC channels use a static preallocated space for launch auth tags
|
||||
push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
|
||||
push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
|
||||
}
|
||||
push_auth_tag_cpu = uvm_push_get_single_inline_buffer(&indirect_push,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&push_auth_tag_gpu);
|
||||
|
||||
uvm_conf_computing_cpu_encrypt(indirect_push.channel,
|
||||
push_enc_cpu,
|
||||
push->begin,
|
||||
NULL,
|
||||
uvm_push_get_size(push),
|
||||
push_enc_auth_tag);
|
||||
push_auth_tag_cpu);
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
if (uvm_channel_is_sec2(indirect_push.channel)) {
|
||||
gpu->parent->sec2_hal->decrypt(&indirect_push,
|
||||
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
|
||||
push_enc_gpu,
|
||||
uvm_push_get_size(push),
|
||||
push_enc_auth_tag_gpu.address);
|
||||
}
|
||||
else {
|
||||
gpu->parent->ce_hal->decrypt(&indirect_push,
|
||||
uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
|
||||
uvm_gpu_address_virtual(push_enc_gpu),
|
||||
uvm_push_get_size(push),
|
||||
push_enc_auth_tag_gpu);
|
||||
}
|
||||
gpu->parent->sec2_hal->decrypt(&indirect_push,
|
||||
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
|
||||
push_enc_gpu,
|
||||
uvm_push_get_size(push),
|
||||
push_auth_tag_gpu.address);
|
||||
|
||||
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
|
||||
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
|
||||
uvm_push_get_size(push),
|
||||
UVM_GPFIFO_SYNC_PROCEED);
|
||||
|
||||
// TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
|
||||
// SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
|
||||
// Use 2x semaphore release to set the values directly.
|
||||
// We could use a single 64 bit release if it were available
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(gpfifo_entry));
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
do_semaphore_release(&indirect_push, gpfifo_gpu + 4, NvU64_HI32(gpfifo_entry));
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
|
||||
set_gpfifo_via_sec2(&indirect_push, push->channel, old_cpu_put, gpfifo_entry);
|
||||
update_gpput_via_sec2(&indirect_push, push->channel, new_gpu_put);
|
||||
|
||||
// Ring the doorbell
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push,
|
||||
push->channel->channel_info.workSubmissionOffsetGpuVa,
|
||||
@@ -930,11 +1107,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
}
|
||||
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
|
||||
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
|
||||
|
||||
// This codepath should only be used during initialization and thus
|
||||
// NEVER return an error.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
|
||||
}
|
||||
else {
|
||||
// submitting via SEC2 starts a push, postpone until this push is ended
|
||||
@@ -963,7 +1136,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
wmb();
|
||||
|
||||
if (needs_sec2_work_submit) {
|
||||
NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
|
||||
NV_STATUS status = internal_channel_submit_work_indirect_sec2(push, cpu_put, new_cpu_put);
|
||||
|
||||
// This codepath should only be used during initialization and thus
|
||||
// NEVER return an error.
|
||||
@@ -1007,7 +1180,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
|
||||
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(channel->pool->manager) ?
|
||||
UVM_CHANNEL_TYPE_WLC :
|
||||
UVM_CHANNEL_TYPE_SEC2;
|
||||
NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_sec2(channel));
|
||||
|
||||
@@ -1026,17 +1198,26 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
|
||||
// SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
|
||||
// Use 2x semaphore release to set the values directly.
|
||||
// One 64bit semahore release can be used instead once implemented.
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(entry->control_value));
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
do_semaphore_release(&indirect_push, gpfifo_gpu + 4, NvU64_HI32(entry->control_value));
|
||||
if (uvm_channel_is_sec2(indirect_push.channel)) {
|
||||
set_gpfifo_via_sec2(&indirect_push, channel, old_cpu_put, entry->control_value);
|
||||
update_gpput_via_sec2(&indirect_push, channel, new_gpu_put);
|
||||
} else {
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(&indirect_push);
|
||||
NvU64 gpfifo_gpu_va = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));
|
||||
|
||||
gpu->parent->ce_hal->memset_8(&indirect_push,
|
||||
uvm_gpu_address_virtual(gpfifo_gpu_va),
|
||||
entry->control_value,
|
||||
sizeof(entry->control_value));
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
|
||||
}
|
||||
|
||||
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
|
||||
do_semaphore_release(&indirect_push,
|
||||
channel->channel_info.workSubmissionOffsetGpuVa,
|
||||
channel->channel_info.workSubmissionToken);
|
||||
|
||||
status = uvm_push_end_and_wait(&indirect_push);
|
||||
if (status != NV_OK)
|
||||
@@ -1044,9 +1225,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
|
||||
|
||||
channel->conf_computing.gpu_put = new_gpu_put;
|
||||
|
||||
// The above SEC2 work transferred everything
|
||||
// Ring the doorbell
|
||||
UVM_GPU_WRITE_ONCE(*channel->channel_info.workSubmissionOffset, channel->channel_info.workSubmissionToken);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1445,17 +1623,21 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
|
||||
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
|
||||
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
|
||||
PAGE_SIZE,
|
||||
&channel->conf_computing.static_pb_unprotected_sysmem);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Both pushes will be targets for SEC2 decrypt operations and have to
|
||||
// be aligned for SEC2. The first push location will also be a target
|
||||
// for CE decrypt operation and has to be aligned for CE decrypt.
|
||||
status = uvm_rm_mem_alloc(gpu,
|
||||
UVM_RM_MEM_TYPE_GPU,
|
||||
UVM_MAX_WLC_PUSH_SIZE * 2,
|
||||
UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT) * 2,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&channel->conf_computing.static_pb_protected_vidmem);
|
||||
if (status != NV_OK)
|
||||
@@ -1464,16 +1646,16 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
|
||||
channel->conf_computing.static_pb_unprotected_sysmem_cpu =
|
||||
uvm_rm_mem_get_cpu_va(channel->conf_computing.static_pb_unprotected_sysmem);
|
||||
channel->conf_computing.static_pb_unprotected_sysmem_auth_tag_cpu =
|
||||
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + UVM_MAX_WLC_PUSH_SIZE;
|
||||
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + aligned_wlc_push_size;
|
||||
|
||||
// The location below is only used for launch pushes but reuses
|
||||
// the same sysmem allocation
|
||||
channel->conf_computing.launch_auth_tag_cpu =
|
||||
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu +
|
||||
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
channel->conf_computing.launch_auth_tag_gpu_va =
|
||||
uvm_rm_mem_get_gpu_uvm_va(channel->conf_computing.static_pb_unprotected_sysmem, gpu) +
|
||||
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
|
||||
channel->conf_computing.static_pb_protected_sysmem = uvm_kvmalloc(UVM_MAX_WLC_PUSH_SIZE + UVM_PAGE_SIZE_4K);
|
||||
if (!channel->conf_computing.static_pb_protected_sysmem)
|
||||
@@ -2576,7 +2758,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
|
||||
// "decrypt_push" represents WLC decrypt push, constructed using fake_push.
|
||||
// Copied to wlc_pb_base + UVM_MAX_WLC_PUSH_SIZE, as the second of the two
|
||||
// pushes that make the WLC fixed schedule.
|
||||
NvU64 decrypt_push_protected_gpu = protected_vidmem + UVM_MAX_WLC_PUSH_SIZE;
|
||||
NvU64 decrypt_push_protected_gpu = UVM_ALIGN_UP(protected_vidmem + UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT);
|
||||
NvU64 decrypt_push_unprotected_gpu = unprotected_sysmem_gpu + gpfifo_size;
|
||||
void *decrypt_push_unprotected_cpu = (char*)gpfifo_unprotected_cpu + gpfifo_size;
|
||||
|
||||
@@ -2587,7 +2769,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
|
||||
BUILD_BUG_ON(sizeof(*wlc_gpfifo_entries) != sizeof(*wlc->channel_info.gpFifoEntries));
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_wlc(wlc));
|
||||
UVM_ASSERT(tag_offset == UVM_MAX_WLC_PUSH_SIZE);
|
||||
UVM_ASSERT(tag_offset == UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
// WLC schedule consists of two parts, the number of entries needs to be even.
|
||||
// This also guarantees that the size is 16B aligned
|
||||
@@ -2692,11 +2874,9 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
|
||||
|
||||
// Prime the WLC by setting "PUT" two steps ahead. Reuse the current
|
||||
// cpu_put value that was used during channel initialization.
|
||||
// Don't update wlc->cpu_put, it will be used to track
|
||||
// submitted pushes as any other channel.
|
||||
do_semaphore_release(&sec2_push,
|
||||
wlc->channel_info.gpPutGpuVa,
|
||||
(wlc->cpu_put + 2) % wlc->num_gpfifo_entries);
|
||||
// Don't update wlc->cpu_put, it will be used to track submitted pushes
|
||||
// as any other channel.
|
||||
update_gpput_via_sec2(&sec2_push, wlc, (wlc->cpu_put + 2) % wlc->num_gpfifo_entries);
|
||||
|
||||
status = uvm_push_end_and_wait(&sec2_push);
|
||||
|
||||
@@ -3048,9 +3228,7 @@ static void channel_manager_stop_wlc(uvm_channel_manager_t *manager)
|
||||
// Every gpfifo entry advances the gpu put of WLC by two so the current
|
||||
// value is: (cpu_put * 2) % num_gpfifo_entries and it's ahead of the
|
||||
// get pointer by 2.
|
||||
do_semaphore_release(&push,
|
||||
channel->channel_info.gpPutGpuVa,
|
||||
(channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
|
||||
update_gpput_via_sec2(&push, channel, (channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
|
||||
}
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
@@ -378,11 +378,12 @@ void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslLogDeviceEncryption(&channel->csl.ctx, iv);
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
|
||||
// overflows. This is not supposed to happen on CC.
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@@ -391,11 +392,12 @@ void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslAcquireEncryptionIv(&channel->csl.ctx, iv);
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
|
||||
// overflows. This is not supposed to happen on CC.
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@@ -439,6 +441,8 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
|
||||
@@ -42,9 +42,11 @@
|
||||
// Use sizeof(UvmCslIv) to refer to the IV size.
|
||||
#define UVM_CONF_COMPUTING_IV_ALIGNMENT 16
|
||||
|
||||
// SEC2 decrypt operation buffers are required to be 16-bytes aligned. CE
|
||||
// encrypt/decrypt can be unaligned if the buffer lies in a single 32B segment.
|
||||
// Otherwise, they need to be 32B aligned.
|
||||
// SEC2 decrypt operation buffers are required to be 16-bytes aligned.
|
||||
#define UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT 16
|
||||
|
||||
// CE encrypt/decrypt can be unaligned if the entire buffer lies in a single
|
||||
// 32B segment. Otherwise, it needs to be 32B aligned.
|
||||
#define UVM_CONF_COMPUTING_BUF_ALIGNMENT 32
|
||||
|
||||
#define UVM_CONF_COMPUTING_DMA_BUFFER_SIZE UVM_VA_BLOCK_SIZE
|
||||
|
||||
@@ -2575,7 +2575,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (folio_test_swapcache(page_folio(src_page))) {
|
||||
if (PageSwapCache(src_page)) {
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be
|
||||
// migrated.
|
||||
if (service_context) {
|
||||
|
||||
@@ -166,6 +166,7 @@ void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, N
|
||||
NvU32 *csl_sign_init = push->next;
|
||||
|
||||
// Check that the provided alignment matches HW
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT != (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT < (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT % (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)) != 0);
|
||||
|
||||
|
||||
@@ -161,22 +161,22 @@
|
||||
// * WFI: 8B
|
||||
// Total: 64B
|
||||
//
|
||||
// Push space needed for secure work launch is 224B. The push is constructed
|
||||
// Push space needed for secure work launch is 364B. The push is constructed
|
||||
// in 'internal_channel_submit_work_indirect' and 'uvm_channel_end_push'
|
||||
// * CE decrypt (of indirect PB): 56B
|
||||
// * 2*semaphore release (indirect GPFIFO entry): 2*24B
|
||||
// * memset_8 (indirect GPFIFO entry): 44B
|
||||
// * semaphore release (indirect GPPUT): 24B
|
||||
// * semaphore release (indirect doorbell): 24B
|
||||
// Appendix added in 'uvm_channel_end_push':
|
||||
// * semaphore release (WLC tracking): 168B
|
||||
// * semaphore increment (memcopy): 24B
|
||||
// * semaphore release (payload): 24B
|
||||
// * notifier memset: 40B
|
||||
// * payload encryption: 64B
|
||||
// * notifier memset: 40B
|
||||
// * semaphore increment (LCIC GPPUT): 24B
|
||||
// * semaphore release (LCIC doorbell): 24B
|
||||
// Total: 368B
|
||||
#define UVM_MAX_WLC_PUSH_SIZE (368)
|
||||
// Total: 364B
|
||||
#define UVM_MAX_WLC_PUSH_SIZE (364)
|
||||
|
||||
// Push space needed for static LCIC schedule, as initialized in
|
||||
// 'setup_lcic_schedule':
|
||||
@@ -184,7 +184,7 @@
|
||||
// * semaphore increment (WLC GPPUT): 24B
|
||||
// * semaphore increment (WLC GPPUT): 24B
|
||||
// * semaphore increment (LCIC tracking): 160B
|
||||
// * semaphore increment (memcopy): 24B
|
||||
// * semaphore increment (payload): 24B
|
||||
// * notifier memcopy: 36B
|
||||
// * payload encryption: 64B
|
||||
// * notifier memcopy: 36B
|
||||
|
||||
@@ -213,6 +213,7 @@ done:
|
||||
typedef enum
|
||||
{
|
||||
MEM_ALLOC_TYPE_SYSMEM_DMA,
|
||||
MEM_ALLOC_TYPE_SYSMEM_PROTECTED,
|
||||
MEM_ALLOC_TYPE_VIDMEM_PROTECTED
|
||||
} mem_alloc_type_t;
|
||||
|
||||
@@ -274,7 +275,11 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
|
||||
TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
|
||||
}
|
||||
else {
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
|
||||
if (type == MEM_ALLOC_TYPE_SYSMEM_DMA)
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
|
||||
else
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, NULL, mem));
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
|
||||
write_range_cpu(*mem, size, 0xdeaddead);
|
||||
@@ -405,48 +410,6 @@ static void gpu_decrypt(uvm_push_t *push,
|
||||
}
|
||||
}
|
||||
|
||||
// This test only uses sysmem so that we can use the CPU for encryption and SEC2
|
||||
// for decryption, i.e., the test doesn't depend on any other GPU engine for
|
||||
// the encryption operation (refer to test_cpu_to_gpu_roundtrip()). This is not
|
||||
// how SEC2 is used in the driver. The intended SEC2 usage is to decrypt from
|
||||
// unprotected sysmem to protected vidmem, which is tested in
|
||||
// test_cpu_to_gpu_roundtrip().
|
||||
static NV_STATUS test_cpu_to_gpu_sysmem(uvm_gpu_t *gpu, size_t copy_size, size_t size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *src_plain = NULL;
|
||||
uvm_mem_t *cipher = NULL;
|
||||
uvm_mem_t *dst_plain = NULL;
|
||||
uvm_mem_t *auth_tag_mem = NULL;
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
uvm_push_t push;
|
||||
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
|
||||
write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
|
||||
write_range_cpu(dst_plain, size, 0xA5A5A5A5);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_SEC2, &push, "enc(cpu)_dec(gpu)"), out);
|
||||
|
||||
cpu_encrypt(push.channel, cipher, src_plain, auth_tag_mem, size, copy_size);
|
||||
gpu_decrypt(&push, dst_plain, cipher, auth_tag_mem, size, copy_size);
|
||||
|
||||
uvm_push_end_and_wait(&push);
|
||||
|
||||
TEST_CHECK_GOTO(mem_match(src_plain, dst_plain), out);
|
||||
|
||||
out:
|
||||
uvm_mem_free(auth_tag_mem);
|
||||
uvm_mem_free(cipher);
|
||||
uvm_mem_free(dst_plain);
|
||||
uvm_mem_free(src_plain);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// This test depends on the CE for the encryption, so we assume tests from
|
||||
// uvm_ce_test.c have successfully passed.
|
||||
static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, size_t size)
|
||||
@@ -461,19 +424,16 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
uvm_push_t push;
|
||||
UvmCslIv *decrypt_iv;
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!decrypt_iv)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_tracker_init(&tracker);
|
||||
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_VIDMEM_PROTECTED), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
|
||||
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
|
||||
|
||||
write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
|
||||
@@ -483,14 +443,13 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
|
||||
gpu_decrypt(&push, dst_plain, src_cipher, auth_tag_mem, size, copy_size);
|
||||
|
||||
uvm_push_end(&push);
|
||||
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
&tracker,
|
||||
&push,
|
||||
"enc(gpu)_dec(cpu)"),
|
||||
// Wait for SEC2 before launching the CE part.
|
||||
// SEC2 is only allowed to release semaphores in unprotected sysmem,
|
||||
// and CE can only acquire semaphores in protected vidmem.
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "enc(gpu)_dec(cpu)"),
|
||||
out);
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
|
||||
@@ -521,8 +480,6 @@ out:
|
||||
|
||||
uvm_kvfree(decrypt_iv);
|
||||
|
||||
uvm_tracker_deinit(&tracker);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -545,7 +502,6 @@ static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu)
|
||||
|
||||
UVM_ASSERT(size % copy_sizes[i] == 0);
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_sysmem(gpu, copy_sizes[i], size));
|
||||
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu, copy_sizes[i], size));
|
||||
}
|
||||
|
||||
|
||||
@@ -69,6 +69,14 @@ static NV_STATUS test_tracker_completion(uvm_va_space_t *va_space)
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore mechanism
|
||||
// to all semaphore
|
||||
// This test allocates semaphore in vidmem and then releases it from the CPU
|
||||
// SEC2 channels cannot target semaphores in vidmem. Moreover, CPU cannot
|
||||
// directly release values to vidmem for CE channels.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &sema));
|
||||
|
||||
uvm_tracker_init(&tracker);
|
||||
|
||||
@@ -7189,6 +7189,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
static void map_get_allowed_destinations(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_processor_id_t id,
|
||||
uvm_processor_mask_t *allowed_mask)
|
||||
@@ -7200,7 +7201,10 @@ static void map_get_allowed_destinations(uvm_va_block_t *block,
|
||||
uvm_processor_mask_zero(allowed_mask);
|
||||
uvm_processor_mask_set(allowed_mask, policy->preferred_location);
|
||||
}
|
||||
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) || uvm_id_equal(policy->preferred_location, id)) &&
|
||||
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
|
||||
(uvm_id_equal(policy->preferred_location, id) &&
|
||||
!is_uvm_fault_force_sysmem_set() &&
|
||||
!uvm_hmm_must_use_sysmem(block, va_block_context))) &&
|
||||
uvm_va_space_processor_has_memory(va_space, id)) {
|
||||
// When operating under read-duplication we should only map the local
|
||||
// processor to cause fault-and-duplicate of remote pages.
|
||||
@@ -7285,7 +7289,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
|
||||
// Map per resident location so we can more easily detect physically-
|
||||
// contiguous mappings.
|
||||
map_get_allowed_destinations(va_block, va_block_context->policy, id, &allowed_destinations);
|
||||
map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);
|
||||
|
||||
for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
|
||||
@@ -418,15 +418,6 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
uvm_global_processor_mask_t retained_gpus;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
|
||||
// Normally we'd expect this to happen as part of uvm_mm_release()
|
||||
// but if userspace never initialized uvm_mm_fd that won't happen.
|
||||
// We don't have to take the va_space_mm spinlock and update state
|
||||
// here because we know no other thread can be in or subsequently
|
||||
// call uvm_api_mm_initialize successfully because the UVM
|
||||
// file-descriptor has been released.
|
||||
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
|
||||
// Remove the VA space from the global list before we start tearing things
|
||||
// down so other threads can't see the VA space in a partially-valid state.
|
||||
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
|
||||
@@ -532,7 +523,14 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
|
||||
// MM FD teardown should already have destroyed va_space_mm
|
||||
// Normally we'd expect this to happen as part of uvm_mm_release()
|
||||
// but if userspace never initialized uvm_mm_fd that won't happen.
|
||||
// We don't have to take the va_space_mm spinlock and update state
|
||||
// here because we know no other thread can be in or subsequently
|
||||
// call uvm_api_mm_initialize successfully because the UVM
|
||||
// file-descriptor has been released.
|
||||
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
|
||||
uvm_va_space_mm_unregister(va_space);
|
||||
UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
@@ -1396,6 +1396,8 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
|
||||
|
||||
nv->flags |= NV_FLAG_OPEN;
|
||||
|
||||
rm_request_dnotifier_state(sp, nv);
|
||||
|
||||
/*
|
||||
* Now that RM init is done, allow dynamic power to control the GPU in FINE
|
||||
* mode, if enabled. (If the mode is COARSE, this unref will do nothing
|
||||
|
||||
@@ -290,10 +290,6 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
|
||||
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
|
||||
gpuChannelHandle channel);
|
||||
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
|
||||
NV_STATUS nvGpuOpsCcslLogDeviceEncryption(struct ccslContext_t *ctx,
|
||||
NvU8 *decryptIv);
|
||||
NV_STATUS nvGpuOpsCcslAcquireEncryptionIv(struct ccslContext_t *ctx,
|
||||
NvU8 *encryptIv);
|
||||
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
|
||||
NvU8 direction);
|
||||
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
|
||||
@@ -312,6 +308,8 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
|
||||
NvU8 const *inputBuffer,
|
||||
NvU8 const *decryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
NvU8 const *authTagBuffer);
|
||||
NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
|
||||
NvU32 bufferSize,
|
||||
@@ -320,5 +318,9 @@ NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
|
||||
NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx,
|
||||
NvU8 direction,
|
||||
NvU64 *messageNum);
|
||||
NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
|
||||
NvU8 direction,
|
||||
NvU64 increment,
|
||||
NvU8 *iv);
|
||||
|
||||
#endif /* _NV_GPU_OPS_H_*/
|
||||
|
||||
@@ -1504,44 +1504,18 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *decryptIv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_log_device_encryption(sp, uvmCslContext->ctx, (NvU8 *)decryptIv);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslLogDeviceEncryption);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslDirection direction)
|
||||
UvmCslOperation operation)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, direction);
|
||||
status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, operation);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslRotateIv);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *encryptIv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
BUILD_BUG_ON(NV_OFFSETOF(UvmCslIv, fresh) != sizeof(encryptIv->iv));
|
||||
|
||||
status = rm_gpu_ops_ccsl_acquire_encryption_iv(sp, uvmCslContext->ctx, (NvU8*)encryptIv);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslAcquireEncryptionIv);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
@@ -1566,6 +1540,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
NvU8 const *authTagBuffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -1577,6 +1553,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
inputBuffer,
|
||||
(NvU8 *)decryptIv,
|
||||
outputBuffer,
|
||||
addAuthData,
|
||||
addAuthDataSize,
|
||||
authTagBuffer);
|
||||
|
||||
return status;
|
||||
@@ -1598,18 +1576,32 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslSign);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
UvmCslDirection direction,
|
||||
UvmCslOperation operation,
|
||||
NvU64 *messageNum)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, direction, messageNum);
|
||||
status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, operation, messageNum);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslQueryMessagePool);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU64 increment,
|
||||
UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_increment_iv(sp, uvmCslContext->ctx, operation, increment, (NvU8 *)iv);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslIncrementIv);
|
||||
|
||||
#else // NV_UVM_ENABLE
|
||||
|
||||
NV_STATUS nv_uvm_suspend(void)
|
||||
|
||||
Reference in New Issue
Block a user