550.90.07

This commit is contained in:
Bernhard Stoeckner
2024-06-04 13:48:03 +02:00
parent 083cd9cf17
commit e45d91de02
180 changed files with 43467 additions and 38127 deletions

View File

@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.78\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.90.07\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
OS_HYPERVISOR_UNKNOWN
} HYPERVISOR_TYPE;
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
#define CMD_VGPU_VFIO_PRESENT 3
#define CMD_VFIO_PCI_CORE_PRESENT 4
#define CMD_VFIO_WAKE_REMOVE_GPU 1
#define CMD_VGPU_VFIO_PRESENT 2
#define CMD_VFIO_PCI_CORE_PRESENT 3
#define MAX_VF_COUNT_PER_GPU 64
#define MAX_VF_COUNT_PER_GPU 64
typedef enum _VGPU_TYPE_INFO
{
@@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
typedef struct
{
void *vgpuVfioRef;
void *waitQueue;
void *nv;
NvU32 *vgpuTypeIds;
NvU8 **vgpuNames;
NvU32 numVgpuTypes;
NvU32 domain;
NvU8 bus;
NvU8 slot;
NvU8 function;
NvBool is_virtfn;
NvU32 domain;
NvU32 bus;
NvU32 device;
NvU32 return_status;
} vgpu_vfio_info;
typedef struct

View File

@@ -1614,6 +1614,10 @@ typedef struct nv_linux_state_s {
nv_kthread_q_t open_q;
NvBool is_accepting_opens;
struct semaphore open_q_lock;
#if defined(NV_VGPU_KVM_BUILD)
wait_queue_head_t wait;
NvS32 return_status;
#endif
} nv_linux_state_t;
extern nv_linux_state_t *nv_linux_devices;

View File

@@ -1041,13 +1041,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -1505,23 +1505,35 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
/*******************************************************************************
nvUvmInterfaceCslUpdateContext
nvUvmInterfaceCslRotateKey
Updates a context after a key rotation event and can only be called once per
key rotation event. Following a key rotation event, and before
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
Disables channels and rotates keys.
Locking: This function acquires an API lock.
Memory : This function does not dynamically allocate memory.
This function disables channels and rotates associated keys. The channels
associated with the given CSL contexts must be idled before this function is
called. To trigger key rotation all allocated channels for a given key must
be present in the list. If the function returns successfully then the CSL
contexts have been updated with the new key.
Locking: This function attempts to acquire the GPU lock. In case of failure
to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
guarantee that no CSL function, including this one, is invoked
concurrently with the CSL contexts in contextList.
Memory : This function dynamically allocates memory.
Arguments:
uvmCslContext[IN] - The CSL context associated with a channel.
contextList[IN/OUT] - An array of pointers to CSL contexts.
contextListCount[IN] - Number of CSL contexts in contextList. Its value
must be greater than 0.
Error codes:
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
can retry at a later time.
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
when attempting to acquire a lock.
*/
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
NvU32 contextListCount);
/*******************************************************************************
nvUvmInterfaceCslRotateIv
@@ -1529,17 +1541,13 @@ NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
Rotates the IV for a given channel and operation.
This function will rotate the IV on both the CPU and the GPU.
Outstanding messages that have been encrypted by the GPU should first be
decrypted before calling this function with operation equal to
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
encrypted by the CPU should first be decrypted before calling this function
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
the channel must be idle before calling this function. This function can be
called regardless of the value of the IV's message counter.
For a given operation the channel must be idle before calling this function.
This function can be called regardless of the value of the IV's message counter.
Locking: This function attempts to acquire the GPU lock.
In case of failure to acquire the return code
is NV_ERR_STATE_IN_USE.
Locking: This function attempts to acquire the GPU lock. In case of failure to
acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
that no CSL function, including this one, is invoked concurrently with
the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
@@ -1573,8 +1581,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
However, it is optional. If it is NULL, the next IV in line will be used.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
@@ -1610,9 +1618,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
maximized when the input and output buffers are 16-byte aligned. This is
natural alignment for AES block.
During a key rotation event the previous key is stored in the CSL context.
This allows data encrypted by the GPU to be decrypted with the previous key.
The keyRotationId parameter identifies which key is used. The first key rotation
ID has a value of 0 that increments by one for each key rotation event.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
@@ -1622,6 +1635,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
internal counter is used.
keyRotationId[IN] - Specifies the key that is used for decryption.
A value of NV_U32_MAX specifies the current key.
inputBuffer[IN] - Address of ciphertext input buffer.
outputBuffer[OUT] - Address of plaintext output buffer.
addAuthData[IN] - Address of the plaintext additional authenticated data used to
@@ -1642,6 +1657,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU32 keyRotationId,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
@@ -1656,8 +1672,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
undefined behavior.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
@@ -1685,8 +1701,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
Locking: This function does not acquire an API or GPU lock.
Memory : This function does not dynamically allocate memory.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
@@ -1711,8 +1727,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
the returned IV can be used in nvUvmInterfaceCslDecrypt.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
@@ -1734,28 +1750,41 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
UvmCslIv *iv);
/*******************************************************************************
nvUvmInterfaceCslLogExternalEncryption
nvUvmInterfaceCslLogEncryption
Checks and logs information about non-CSL encryptions, such as those that
originate from the GPU.
Checks and logs information about encryptions associated with the given
CSL context.
This function does not modify elements of the UvmCslContext.
For contexts associated with channels, this function does not modify elements of
the UvmCslContext, and must be called for every CPU/GPU encryption.
For the context associated with fault buffers, bufferSize can encompass multiple
encryption invocations, and the UvmCslContext will be updated following a key
rotation event.
In either case the IV remains unmodified after this function is called.
Locking: This function does not acquire an API or GPU lock.
Memory : This function does not dynamically allocate memory.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[OUT] - The size of the buffer encrypted by the
operation[IN] - If the CSL context is associated with a fault
buffer, this argument is ignored. If it is
associated with a channel, it must be either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
bufferSize[IN] - The size of the buffer(s) encrypted by the
external entity in units of bytes.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
to overflow.
*/
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
NvU32 bufferSize);
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU32 bufferSize);
#endif // _NV_UVM_INTERFACE_H_

View File

@@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag
// The errorNotifier is filled out when the channel hits an RC error.
NvNotification *errorNotifier;
NvNotification *keyRotationNotifier;
NvU32 hwRunlistId;
NvU32 hwChannelId;
@@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
NvU64 workSubmissionOffsetGpuVa;
NvU64 workSubmissionOffsetGpuVa;
} UvmGpuChannelInfo;
typedef enum
@@ -604,6 +605,8 @@ typedef struct UvmGpuConfComputeCaps_tag
{
// Out: GPU's confidential compute mode
UvmGpuConfComputeMode mode;
// Is key rotation enabled for UVM keys
NvBool bKeyRotationEnabled;
} UvmGpuConfComputeCaps;
#define UVM_GPU_NAME_LENGTH 0x40
@@ -1086,4 +1089,21 @@ typedef enum UvmCslOperation
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;
typedef enum UVM_KEY_ROTATION_STATUS {
// Key rotation complete/not in progress
UVM_KEY_ROTATION_STATUS_IDLE = 0,
// RM is waiting for clients to report their channels are idle for key rotation
UVM_KEY_ROTATION_STATUS_PENDING = 1,
// Key rotation is in progress
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
// Key rotation timeout failure, RM will RC non-idle channels.
// UVM should never see this status value.
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
} UVM_KEY_ROTATION_STATUS;
#endif // _NV_UVM_TYPES_H_

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -103,14 +103,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_key(nvidia_stack_t *, UvmCslContext *[], NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU32, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU32);
#endif

View File

@@ -1416,6 +1416,42 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
;;
bus_type_has_iommu_ops)
#
# Determine if 'bus_type' structure has a 'iommu_ops' field.
#
# This field was removed by commit 17de3f5fdd35 (iommu: Retire bus ops)
# in v6.8
#
CODE="
#include <linux/device.h>
int conftest_bus_type_has_iommu_ops(void) {
return offsetof(struct bus_type, iommu_ops);
}"
compile_check_conftest "$CODE" "NV_BUS_TYPE_HAS_IOMMU_OPS" "" "types"
;;
eventfd_signal_has_counter_arg)
#
# Determine if eventfd_signal() function has an additional 'counter' argument.
#
# This argument was removed by commit 3652117f8548 (eventfd: simplify
# eventfd_signal()) in v6.8
#
CODE="
#include <linux/eventfd.h>
void conftest_eventfd_signal_has_counter_arg(void) {
struct eventfd_ctx *ctx;
eventfd_signal(ctx, 1);
}"
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
;;
drm_available)
# Determine if the DRM subsystem is usable
CODE="
@@ -5216,25 +5252,23 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
;;
unsafe_follow_pfn)
follow_pfn)
#
# Determine if unsafe_follow_pfn() is present.
# Determine if follow_pfn() is present.
#
# unsafe_follow_pfn() was added by commit 69bacee7f9ad
# ("mm: Add unsafe_follow_pfn") in v5.13-rc1.
#
# Note: this commit never made it to the linux kernel, so
# unsafe_follow_pfn() never existed.
# follow_pfn() was added by commit 3b6748e2dd69
# ("mm: introduce follow_pfn()") in v2.6.31-rc1, and removed
# by commit 233eb0bf3b94 ("mm: remove follow_pfn")
# from linux-next 233eb0bf3b94.
#
CODE="
#include <linux/mm.h>
void conftest_unsafe_follow_pfn(void) {
unsafe_follow_pfn();
void conftest_follow_pfn(void) {
follow_pfn();
}"
compile_check_conftest "$CODE" "NV_UNSAFE_FOLLOW_PFN_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
;;
drm_plane_atomic_check_has_atomic_state_arg)
#
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -1448,7 +1448,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
//
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if the destination processor is
// the CPU.
// the CPU. -1 indicates no preference, in which case the pages used
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
// only 0 and -1 are allowed.
//
// Error codes:
// NV_ERR_INVALID_ADDRESS:
@@ -1462,6 +1464,11 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
// The VA range exceeds the largest virtual address supported by the
// destination processor.
//
// NV_ERR_INVALID_ARGUMENT:
// preferredCpuMemoryNode is not a valid CPU NUMA node or it corresponds
// to a NUMA node ID for a registered GPU. If NUMA is disabled, it
// indicates that preferredCpuMemoryNode was not either 0 or -1.
//
// NV_ERR_INVALID_DEVICE:
// destinationUuid does not represent a valid processor such as a CPU or
// a GPU with a GPU VA space registered for it. Or destinationUuid is a
@@ -1528,8 +1535,9 @@ NV_STATUS UvmMigrate(void *base,
//
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if the destination processor is
// the CPU. This argument is ignored if the given virtual address range
// corresponds to managed memory.
// the CPU. -1 indicates no preference, in which case the pages used
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
// only 0 and -1 are allowed.
//
// semaphoreAddress: (INPUT)
// Base address of the semaphore.
@@ -1586,8 +1594,8 @@ NV_STATUS UvmMigrateAsync(void *base,
//
// Migrates the backing of all virtual address ranges associated with the given
// range group to the specified destination processor. The behavior of this API
// is equivalent to calling UvmMigrate on each VA range associated with this
// range group.
// is equivalent to calling UvmMigrate with preferredCpuMemoryNode = -1 on each
// VA range associated with this range group.
//
// Any errors encountered during migration are returned immediately. No attempt
// is made to migrate the remaining unmigrated ranges and the ranges that are
@@ -2169,7 +2177,8 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
//
// If any page in the VA range has a preferred location, then the migration and
// mapping policies associated with this API take precedence over those related
// to the preferred location.
// to the preferred location. If the preferred location is a specific CPU NUMA
// node, that NUMA node will be used for a CPU-resident copy of the page.
//
// If any pages in this VA range have any processors present in their
// accessed-by list, the migration and mapping policies associated with this
@@ -2300,7 +2309,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// UvmPreventMigrationRangeGroups has not been called on the range group that
// those pages are associated with, then the migration and mapping policies
// associated with UvmEnableReadDuplication override the policies outlined
// above. Note that enabling read duplication on on any pages in this VA range
// above. Note that enabling read duplication on any pages in this VA range
// does not clear the state set by this API for those pages. It merely overrides
// the policies associated with this state until read duplication is disabled
// for those pages.
@@ -2333,7 +2342,8 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
// allowed by the global and thread memory policies.
// allowed by the global and thread memory policies. If NUMA is disabled
// only 0 and -1 are allowed.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:

View File

@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
NvU32 key_version,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
key_version,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
NvU32 key_version,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
key_version,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
dst_cipher);
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
if (i > 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
UvmCslIv *decrypt_iv = NULL;
UvmCslIv *encrypt_iv = NULL;
NvU32 key_version;
uvm_tracker_t tracker;
size_t src_plain_size;
@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
// There shouldn't be any key rotation between the end of the push and the
// CPU decryption(s), but it is more robust against test changes to force
// decryption to use the saved key.
key_version = uvm_channel_pool_key_version(push.channel->pool);
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
dst_plain,
dst_cipher,
decrypt_iv,
key_version,
auth_tag_mem,
size,
copy_size),
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
dst_plain,
dst_cipher,
decrypt_iv,
key_version,
auth_tag_mem,
size,
copy_size),

File diff suppressed because it is too large Load Diff

View File

@@ -228,21 +228,65 @@ typedef struct
// variant is required when the thread holding the pool lock must sleep
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
// RM.
union {
union
{
uvm_spinlock_t spinlock;
uvm_mutex_t mutex;
};
// Secure operations require that uvm_push_begin order matches
// uvm_push_end order, because the engine's state is used in its internal
// operation and each push may modify this state. push_locks is protected by
// the channel pool lock.
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
struct
{
// Secure operations require that uvm_push_begin order matches
// uvm_push_end order, because the engine's state is used in its
// internal operation and each push may modify this state.
// push_locks is protected by the channel pool lock.
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// Per channel buffers in unprotected sysmem.
uvm_rm_mem_t *pool_sysmem;
// Per channel buffers in protected vidmem.
uvm_rm_mem_t *pool_vidmem;
struct
{
// Current encryption key version, incremented upon key rotation.
// While there are separate keys for encryption and decryption, the
// two keys are rotated at once, so the versioning applies to both.
NvU32 version;
// Lock used to ensure mutual exclusion during key rotation.
uvm_mutex_t mutex;
// CSL contexts passed to RM for key rotation. This is usually an
// array containing the CSL contexts associated with the channels in
// the pool. In the case of the WLC pool, the array also includes
// CSL contexts associated with LCIC channels.
UvmCslContext **csl_contexts;
// Number of elements in the CSL context array.
unsigned num_csl_contexts;
// Number of bytes encrypted, or decrypted, on the engine associated
// with the pool since the last key rotation. Only used during
// testing, to force key rotations after a certain encryption size,
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
//
// Encryptions on a LCIC pool are accounted for in the paired WLC
// pool.
//
// TODO: Bug 4612912: these accounting variables can be removed once
// RM exposes an API to set the key rotation lower threshold.
atomic64_t encrypted;
atomic64_t decrypted;
} key_rotation;
} conf_computing;
} uvm_channel_pool_t;
struct uvm_channel_struct
@@ -322,43 +366,14 @@ struct uvm_channel_struct
// work launches to match the order of push end-s that triggered them.
volatile NvU32 gpu_put;
// Static pushbuffer for channels with static schedule (WLC/LCIC)
uvm_rm_mem_t *static_pb_protected_vidmem;
// Static pushbuffer staging buffer for WLC
uvm_rm_mem_t *static_pb_unprotected_sysmem;
void *static_pb_unprotected_sysmem_cpu;
void *static_pb_unprotected_sysmem_auth_tag_cpu;
// The above static locations are required by the WLC (and LCIC)
// schedule. Protected sysmem location completes WLC's independence
// from the pushbuffer allocator.
// Protected sysmem location makes WLC independent from the pushbuffer
// allocator. Unprotected sysmem and protected vidmem counterparts
// are allocated from the channel pool (sysmem, vidmem).
void *static_pb_protected_sysmem;
// Static tracking semaphore notifier values
// Because of LCIC's fixed schedule, the secure semaphore release
// mechanism uses two additional static locations for incrementing the
// notifier values. See:
// . channel_semaphore_secure_release()
// . setup_lcic_schedule()
// . internal_channel_submit_work_wlc()
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
// Explicit location for push launch tag used by WLC.
// Encryption auth tags have to be located in unprotected sysmem.
void *launch_auth_tag_cpu;
NvU64 launch_auth_tag_gpu_va;
// Used to decrypt the push back to protected sysmem.
// This happens when profilers register callbacks for migration data.
uvm_push_crypto_bundle_t *push_crypto_bundles;
// Accompanying authentication tags for the crypto bundles
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
} conf_computing;
// RM channel information
@@ -418,7 +433,7 @@ struct uvm_channel_manager_struct
unsigned num_channel_pools;
// Mask containing the indexes of the usable Copy Engines. Each usable CE
// has at least one pool associated with it.
// has at least one pool of type UVM_CHANNEL_POOL_TYPE_CE associated with it
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
struct
@@ -451,6 +466,16 @@ struct uvm_channel_manager_struct
UVM_BUFFER_LOCATION gpput_loc;
UVM_BUFFER_LOCATION pushbuffer_loc;
} conf;
struct
{
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
// only be false during (de)initialization.
bool wlc_ready;
// True indicates that key rotation is enabled (UVM-wise).
bool key_rotation_enabled;
} conf_computing;
};
// Create a channel manager for the GPU
@@ -501,6 +526,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
char *uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(uvm_channel_t *channel, unsigned tag_index);
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
{
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
@@ -532,6 +565,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
return UVM_CHANNEL_TYPE_MEMOPS;
}
// Force key rotation in the engine associated with the given channel pool.
// Rotation may still not happen if RM cannot acquire the necessary locks (in
// which case the function returns NV_ERR_STATE_IN_USE).
//
// This function should be only invoked in pools in which key rotation is
// enabled.
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
// Retrieve the current encryption key version associated with the channel pool.
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
// Privileged channels support all the Host and engine methods, while
// non-privileged channels don't support privileged methods.
//
@@ -579,12 +623,9 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
// beginning.
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
// Check if WLC/LCIC mechanism is ready/setup
// Should only return false during initialization
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
{
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
return manager->conf_computing.wlc_ready;
}
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
// associated with access_channel.

View File

@@ -796,11 +796,8 @@ done:
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool;
uvm_push_t *pushes;
uvm_gpu_t *gpu;
NvU32 i;
NvU32 num_pushes;
uvm_push_t *pushes = NULL;
uvm_gpu_t *gpu = NULL;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
@@ -810,9 +807,19 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_type_t channel_type;
// Key rotation is disabled because this test relies on nested pushes,
// which is illegal. If any push other than the first one triggers key
// rotation, the test won't complete. This is because key rotation
// depends on waiting for ongoing pushes to end, which doesn't happen
// if those pushes are ended after the current one begins.
uvm_conf_computing_disable_key_rotation(gpu);
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_RET(pool != NULL);
NvU32 i;
NvU32 num_pushes;
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_GOTO(pool != NULL, error);
// Skip LCIC channels as those can't accept any pushes
if (uvm_channel_pool_is_lcic(pool))
@@ -824,7 +831,7 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
TEST_CHECK_RET(pushes != NULL);
TEST_CHECK_GOTO(pushes != NULL, error);
for (i = 0; i < num_pushes; i++) {
uvm_push_t *push = &pushes[i];
@@ -841,12 +848,18 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
uvm_kvfree(pushes);
}
uvm_conf_computing_enable_key_rotation(gpu);
}
uvm_thread_context_lock_enable_tracking();
return status;
error:
if (gpu != NULL)
uvm_conf_computing_enable_key_rotation(gpu);
uvm_thread_context_lock_enable_tracking();
uvm_kvfree(pushes);
@@ -948,6 +961,318 @@ release:
return NV_OK;
}
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
{
unsigned num_tries;
unsigned max_num_tries = 20;
unsigned num_rotations_completed = 0;
if (num_rotations == 0)
return NV_OK;
// The number of accepted rotations is kept low, so failed rotation
// invocations due to RM not acquiring the necessary locks (which imply a
// sleep in the test) do not balloon the test execution time.
UVM_ASSERT(num_rotations <= 10);
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
// Force key rotation, irrespective of encryption usage.
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
// Key rotation may not be able to complete due to RM failing to acquire
// the necessary locks. Detect the situation, sleep for a bit, and then
// try again
//
// The maximum time spent sleeping in a single rotation call is
// (max_num_tries * max_sleep_us)
if (status == NV_ERR_STATE_IN_USE) {
NvU32 min_sleep_us = 1000;
NvU32 max_sleep_us = 10000;
usleep_range(min_sleep_us, max_sleep_us);
continue;
}
TEST_NV_CHECK_RET(status);
num_rotations_completed++;
}
// If not a single key rotation occurred, the dependent tests still pass,
// but there is no much value to them. Instead, return an error so the
// maximum number of tries, or the maximum sleep time, are adjusted to
// ensure that at least one rotation completes.
if (num_rotations_completed > 0)
return NV_OK;
else
return NV_ERR_STATE_IN_USE;
}
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
{
return force_key_rotations(pool, 1);
}
// Test key rotation in all pools. This is useful because key rotation may not
// happen otherwise on certain engines during UVM test execution. For example,
// if the MEMOPS channel type is mapped to a CE not shared with any other
// channel type, then the only encryption taking place in the engine is due to
// semaphore releases (4 bytes each). This small encryption size makes it
// unlikely to exceed even small rotation thresholds.
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
{
uvm_channel_pool_t *pool;
uvm_for_each_pool(pool, gpu->channel_manager) {
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
continue;
TEST_NV_CHECK_RET(force_key_rotation(pool));
}
return NV_OK;
}
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
// key rotations.
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
{
int i;
uvm_channel_pool_t *gpu_to_cpu_pool;
uvm_channel_pool_t *cpu_to_gpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
void *initial_plain_cpu = NULL;
void *final_plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
initial_plain_cpu = uvm_kvmalloc_zero(size);
if (initial_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
final_plain_cpu = uvm_kvmalloc_zero(size);
if (final_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
memset(initial_plain_cpu, 1, size);
for (i = 0; i < 5; i++) {
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
plain_gpu_address,
initial_plain_cpu,
size,
NULL,
"CPU > GPU"),
out);
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
final_plain_cpu,
plain_gpu_address,
size,
NULL,
"GPU > CPU"),
out);
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
memset(final_plain_cpu, 0, size);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(final_plain_cpu);
uvm_kvfree(initial_plain_cpu);
return status;
}
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
{
uvm_push_t push;
uvm_gpu_address_t gpu_address;
uvm_gpu_t *gpu = mem->backing_gpu;
UVM_ASSERT(uvm_mem_is_vidmem(mem));
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_OK;
}
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
// testing to insert key rotations in between the push end, and the CPU
// decryption
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
unsigned num_rotations_to_insert)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
void *src_cipher, *auth_tag;
uvm_channel_t *channel;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
if (status != NV_OK)
goto out;
channel = push.channel;
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
goto out;
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
// If num_rotations_to_insert is not zero, the current encryption key will
// be different from the one used during CE encryption.
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
status = uvm_conf_computing_cpu_decrypt(channel,
dst_plain,
src_cipher,
dma_buffer->decrypt_iv,
dma_buffer->key_version[0],
size,
auth_tag);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
unsigned num_repetitions,
unsigned num_rotations_to_insert)
{
unsigned i;
uvm_channel_pool_t *gpu_to_cpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
NvU8 *plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
return NV_OK;
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
if (plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
for (i = 0; i < num_repetitions; i++) {
unsigned j;
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
plain_cpu,
plain_gpu_address,
size,
num_rotations_to_insert),
out);
for (j = 0; j < size; j++)
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
memset(plain_cpu, 0, size);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(plain_cpu);
return status;
}
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
// that are no longer the current key, due to key rotation. Given that SEC2
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
// is exclusive of CE encryptions.
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
{
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
// between the GPU encryption, and the associated CPU decryption.
unsigned num_rotations_to_insert = 8;
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
return NV_OK;
}
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
for_each_va_space_gpu(gpu, va_space) {
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
break;
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
}
return NV_OK;
}
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
@@ -1203,6 +1528,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
if (status != NV_OK)
goto done;
status = test_channel_key_rotation(va_space);
if (status != NV_OK)
goto done;
// The following tests have side effects, they reset the GPU's
// channel_manager.
status = test_channel_pushbuffer_extension_base(va_space);
@@ -1338,6 +1667,126 @@ done:
return status;
}
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
int i;
uvm_channel_pool_t *cpu_to_gpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
void *initial_plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
initial_plain_cpu = uvm_kvmalloc_zero(size);
if (initial_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
memset(initial_plain_cpu, 1, size);
for (i = 0; i < params->iterations; i++) {
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
plain_gpu_address,
initial_plain_cpu,
size,
NULL,
"CPU > GPU"),
out);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(initial_plain_cpu);
return status;
}
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
unsigned num_rotations_to_insert = 0;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
}
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
NvU32 i;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
for (i = 0; i < params->iterations; ++i) {
NV_STATUS status;
uvm_channel_pool_t *pool;
uvm_channel_type_t type;
if ((i % 3) == 0)
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
else if ((i % 3) == 1)
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
else
type = UVM_CHANNEL_TYPE_WLC;
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
return NV_ERR_INVALID_STATE;
status = force_key_rotation(pool);
if (status != NV_OK)
return status;
}
return NV_OK;
}
// The objective of this test is documented in the user-level function
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
uvm_test_rng_t rng;
uvm_gpu_t *gpu;
NV_STATUS status = NV_OK;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
uvm_test_rng_init(&rng, params->seed);
uvm_va_space_down_read(va_space);
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
// one.
gpu = random_va_space_gpu(&rng, va_space);
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
goto out;
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
status = channel_stress_key_rotation_rotate(gpu, params);
else
status = NV_ERR_INVALID_PARAMETER;
out:
uvm_va_space_up_read(va_space);
return status;
}
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
@@ -1349,6 +1798,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
return uvm_test_channel_stress_update_channels(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
return uvm_test_channel_noop_push(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
return uvm_test_channel_stress_key_rotation(va_space, params);
default:
return NV_ERR_INVALID_PARAMETER;
}

View File

@@ -33,6 +33,15 @@
#include "nv_uvm_interface.h"
#include "uvm_va_block.h"
// Amount of encrypted data on a given engine that triggers key rotation. This
// is a UVM internal threshold, different from that of RM, and used only during
// testing.
//
// Key rotation is triggered when the total encryption size, or the total
// decryption size (whatever comes first) reaches this lower threshold on the
// engine.
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
// The maximum number of secure operations per push is:
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
@@ -352,6 +361,19 @@ error:
return status;
}
// The production key rotation defaults are such that key rotations rarely
// happen. During UVM testing more frequent rotations are triggering by relying
// on internal encryption usage accounting. When key rotations are triggered by
// UVM, the driver does not rely on channel key rotation notifiers.
//
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
// lower threshold. This function, and all the metadata associated with it
// (per-pool encryption accounting, for example) can be removed at that point.
static bool key_rotation_is_notifier_driven(void)
{
return !uvm_enable_builtin_tests;
}
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
{
NV_STATUS status;
@@ -394,17 +416,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
}
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
{
NV_STATUS status;
uvm_channel_pool_t *pool;
if (uvm_channel_is_lcic(channel))
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
else
pool = channel->pool;
uvm_mutex_lock(&channel->csl.ctx_lock);
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
if (!key_rotation_is_notifier_driven())
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
}
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// IV rotation is done preemptively as needed, so the above
// call cannot return failure.
UVM_ASSERT(status == NV_OK);
uvm_mutex_unlock(&channel->csl.ctx_lock);
}
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
@@ -428,27 +468,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
void *auth_tag_buffer)
{
NV_STATUS status;
uvm_channel_pool_t *pool;
UVM_ASSERT(size);
if (uvm_channel_is_lcic(channel))
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
else
pool = channel->pool;
uvm_mutex_lock(&channel->csl.ctx_lock);
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
size,
(NvU8 const *) src_plain,
encrypt_iv,
(NvU8 *) dst_cipher,
(NvU8 *) auth_tag_buffer);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// IV rotation is done preemptively as needed, so the above
// call cannot return failure.
UVM_ASSERT(status == NV_OK);
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
if (!key_rotation_is_notifier_driven())
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
}
uvm_mutex_unlock(&channel->csl.ctx_lock);
}
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
void *dst_plain,
const void *src_cipher,
const UvmCslIv *src_iv,
NvU32 key_version,
size_t size,
const void *auth_tag_buffer)
{
@@ -469,10 +528,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
size,
(const NvU8 *) src_cipher,
src_iv,
key_version,
(NvU8 *) dst_plain,
NULL,
0,
(const NvU8 *) auth_tag_buffer);
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
nvstatusToString(status),
channel->name,
uvm_gpu_name(uvm_channel_get_gpu(channel)));
}
uvm_mutex_unlock(&channel->csl.ctx_lock);
return status;
@@ -485,6 +553,8 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
NvU8 valid)
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
// There is no dedicated lock for the CSL context associated with replayable
// faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -494,36 +564,48 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
status = nvUvmInterfaceCslDecrypt(csl_context,
fault_entry_size,
(const NvU8 *) src_cipher,
NULL,
NV_U32_MAX,
(NvU8 *) dst_plain,
&valid,
sizeof(valid),
(const NvU8 *) auth_tag_buffer);
if (status != NV_OK)
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
}
return status;
}
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
// See comment in uvm_conf_computing_fault_decrypt
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
UVM_CSL_OPERATION_DECRYPT,
increment,
NULL);
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
UVM_ASSERT(status == NV_OK);
}
@@ -625,3 +707,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
{
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
}
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
{
if (!g_uvm_global.conf_computing_enabled)
return;
// Key rotation cannot be enabled on UVM if it is disabled on RM
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
return;
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
}
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
{
if (!g_uvm_global.conf_computing_enabled)
return;
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
}
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
{
return gpu->channel_manager->conf_computing.key_rotation_enabled;
}
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
{
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
return false;
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
// because currently the encryption key is shared between UVM and RM, but
// UVM is not able to idle SEC2 channels owned by RM.
if (uvm_channel_pool_is_sec2(pool))
return false;
// Key rotation happens as part of channel reservation, and LCIC channels
// are never reserved directly. Rotation of keys in LCIC channels happens
// as the result of key rotation in WLC channels.
//
// Return false even if there is nothing fundamental prohibiting direct key
// rotation on LCIC pools
if (uvm_channel_pool_is_lcic(pool))
return false;
return true;
}
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
{
NvU64 decrypted, encrypted;
UVM_ASSERT(!key_rotation_is_notifier_driven());
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
return true;
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
return true;
return false;
}
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
{
// If key rotation is pending for the pool's engine, then the key rotation
// notifier in any of the engine channels can be used by UVM to detect the
// situation. Note that RM doesn't update all the notifiers in a single
// atomic operation, so it is possible that the channel read by UVM (the
// first one in the pool) indicates that a key rotation is pending, but
// another channel in the pool (temporarily) indicates the opposite, or vice
// versa.
uvm_channel_t *first_channel = pool->channels;
UVM_ASSERT(key_rotation_is_notifier_driven());
UVM_ASSERT(first_channel != NULL);
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
}
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
{
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
return false;
if (key_rotation_is_notifier_driven())
return conf_computing_is_key_rotation_pending_use_notifier(pool);
else
return conf_computing_is_key_rotation_pending_use_stats(pool);
}
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
{
NV_STATUS status;
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
// required locks at this time. This status is not interpreted as an error,
// but as a sign for UVM to try again later. This is the same "protocol"
// used in IV rotation.
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
pool->conf_computing.key_rotation.num_csl_contexts);
if (status == NV_OK) {
pool->conf_computing.key_rotation.version++;
if (!key_rotation_is_notifier_driven()) {
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
}
}
else if (status != NV_ERR_STATE_IN_USE) {
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
pool->engine_index,
nvstatusToString(status));
}
return status;
}
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
void *src_plain,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
void *dst_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
void *src_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
goto out;
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
status = uvm_conf_computing_cpu_decrypt(push.channel,
dst_plain,
src_cipher,
dma_buffer->decrypt_iv,
dma_buffer->key_version[0],
size,
auth_tag);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}

View File

@@ -87,9 +87,9 @@ typedef struct
// a free buffer.
uvm_tracker_t tracker;
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
// writes the authentication tag here. Later when the buffer is decrypted
// on the CPU the authentication tag is used again (read) for CSL to verify
// When the DMA buffer is used as the destination of a GPU encryption, the
// engine (CE or SEC2) writes the authentication tag here. When the buffer
// is decrypted on the CPU the authentication tag is used by CSL to verify
// the authenticity. The allocation is big enough for one authentication
// tag per PAGE_SIZE page in the alloc buffer.
uvm_mem_t *auth_tag;
@@ -98,7 +98,12 @@ typedef struct
// to the authentication tag. The allocation is big enough for one IV per
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
// IV and authentication tag must match.
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
// When the DMA buffer is used as the destination of a GPU encryption, the
// key version used during GPU encryption of each PAGE_SIZE page can be
// saved here, so CPU decryption uses the correct decryption key.
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
// Bitmap of the encrypted pages in the backing allocation
uvm_page_mask_t encrypted_page_mask;
@@ -147,7 +152,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
// Logs encryption information from the GPU and returns the IV.
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
// Acquires next CPU encryption IV and returns it.
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
@@ -167,10 +172,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
// CPU side decryption helper. Decrypts data from src_cipher and writes the
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
//
// The caller must indicate which key to use for decryption by passing the
// appropiate key version number.
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
void *dst_plain,
const void *src_cipher,
const UvmCslIv *src_iv,
NvU32 key_version,
size_t size,
const void *auth_tag_buffer);
@@ -191,12 +200,12 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
NvU8 valid);
// Increment the CPU-side decrypt IV of the CSL context associated with
// replayable faults. The function is a no-op if the given increment is zero.
// replayable faults.
//
// The IV associated with a fault CSL context is a 64-bit counter.
//
// Locking: this function must be invoked while holding the replayable ISR lock.
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu);
// Query the number of remaining messages before IV needs to be rotated.
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
@@ -214,4 +223,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
// Check if there are fewer than 'limit' messages available in either direction
// and rotate if not.
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
// Rotate the engine key associated with the given channel pool.
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
// Returns true if key rotation is allowed in the channel pool.
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
// Returns true if key rotation is pending in the channel pool.
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
// dependent on RM enablement: key rotation may still be disabled upon calling
// this function, if it is disabled in RM. On the other hand, key rotation can
// be disabled in UVM, even if it is enabled in RM.
//
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
// kernel key rotation.
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
// Launch a synchronous, encrypted copy between CPU and GPU.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
// text) contents; the function internally performs a CPU-side encryption step
// before launching the GPU-side CE decryption. The source buffer can be in
// protected or unprotected sysmem, while the destination buffer must be in
// protected vidmem.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
void *src_plain,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...);
// Launch a synchronous, encrypted copy between CPU and GPU.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
// text) contents; the function internally performs a CPU-side encryption step
// before launching the GPU-side CE decryption. The source buffer can be in
// protected or unprotected sysmem, while the destination buffer must be in
// protected vidmem.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...);
#endif // __UVM_CONF_COMPUTING_H__

View File

@@ -591,7 +591,7 @@ static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU
// replayable faults still requires manual adjustment so it is kept in sync
// with the encryption IV on the GSP-RM's side.
if (g_uvm_global.conf_computing_enabled)
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu);
parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
}

View File

@@ -60,6 +60,17 @@ struct uvm_gpu_semaphore_pool_page_struct
// Allocation backing the page
uvm_rm_mem_t *memory;
struct {
// Unprotected sysmem storing encrypted value of semaphores
uvm_rm_mem_t *encrypted_payload_memory;
// Unprotected sysmem storing encryption auth tags
uvm_rm_mem_t *auth_tag_memory;
// Unprotected sysmem storing plain text notifier values
uvm_rm_mem_t *notifier_memory;
} conf_computing;
// Pool the page is part of
uvm_gpu_semaphore_pool_t *pool;
@@ -80,26 +91,6 @@ static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
}
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
{
NvU32 offset;
NvU32 index;
if (gpu_semaphore_is_secure(semaphore))
return semaphore->conf_computing.index;
UVM_ASSERT(semaphore->payload != NULL);
UVM_ASSERT(semaphore->page != NULL);
offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
index = offset / UVM_SEMAPHORE_SIZE;
UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
return index;
}
// Use canary values on debug builds to catch semaphore use-after-free. We can
// catch release-after-free by simply setting the payload to a known value at
// free then checking it on alloc or pool free, but catching acquire-after-free
@@ -150,34 +141,83 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
}
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
// the owning GPU as no other processor have access to it.
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
uvm_gpu_semaphore_pool_page_t *pool_page,
uvm_rm_mem_type_t memory_type)
static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
{
uvm_rm_mem_free(page->memory);
page->memory = NULL;
if (gpu_semaphore_pool_is_secure(page->pool)) {
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
uvm_rm_mem_free(page->conf_computing.notifier_memory);
page->conf_computing.encrypted_payload_memory = NULL;
page->conf_computing.auth_tag_memory = NULL;
page->conf_computing.notifier_memory = NULL;
}
else {
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
UVM_ASSERT(!page->conf_computing.notifier_memory);
}
}
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
{
NV_STATUS status;
uvm_gpu_semaphore_pool_t *pool = page->pool;
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
size_t align = 0;
bool map_all = true;
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
status = uvm_rm_mem_alloc(pool->gpu,
memory_type,
UVM_SEMAPHORE_PAGE_SIZE,
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&pool_page->memory);
if (map_all)
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
else
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
if (status != NV_OK)
return status;
goto error;
if (!gpu_semaphore_pool_is_secure(pool))
return NV_OK;
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_PAGE_SIZE,
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&page->conf_computing.encrypted_payload_memory);
if (status != NV_OK)
goto error;
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&page->conf_computing.auth_tag_memory);
if (status != NV_OK)
goto error;
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
0,
&page->conf_computing.notifier_memory);
if (status != NV_OK)
goto error;
return NV_OK;
error:
pool_page_free_buffers(page);
return status;
}
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
{
NV_STATUS status;
uvm_gpu_semaphore_pool_page_t *pool_page;
NvU32 *payloads;
size_t i;
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
uvm_assert_mutex_locked(&pool->mutex);
@@ -188,24 +228,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
pool_page->pool = pool;
// Whenever the Confidential Computing feature is enabled, engines can
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
// also disabled.
if (gpu_semaphore_pool_is_secure(pool)) {
status = pool_alloc_secure_page(pool, pool_page, memory_type);
if (status != NV_OK)
goto error;
}
else {
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
memory_type,
UVM_SEMAPHORE_PAGE_SIZE,
0,
&pool_page->memory);
status = pool_page_alloc_buffers(pool_page);
if (status != NV_OK)
goto error;
}
// Verify the GPU can access the semaphore pool.
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
@@ -217,7 +242,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
if (semaphore_uses_canary(pool)) {
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
size_t i;
NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
payloads[i] = make_canary(0);
}
@@ -253,7 +280,7 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
list_del(&page->all_pages_node);
uvm_rm_mem_free(page->memory);
pool_page_free_buffers(page);
uvm_kvfree(page);
}
@@ -273,19 +300,22 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
goto done;
list_for_each_entry(page, &pool->pages, all_pages_node) {
NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
continue;
if (gpu_semaphore_pool_is_secure(pool)) {
semaphore->conf_computing.index = semaphore_index;
}
else {
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
semaphore_index * UVM_SEMAPHORE_SIZE);
}
semaphore->page = page;
semaphore->index = semaphore_index;
if (gpu_semaphore_pool_is_secure(pool)) {
// Reset the notifier to prevent detection of false attack when
// checking for updated value
*uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
}
if (semaphore_uses_canary(pool))
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
@@ -311,7 +341,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
{
uvm_gpu_semaphore_pool_page_t *page;
uvm_gpu_semaphore_pool_t *pool;
NvU32 index;
UVM_ASSERT(semaphore);
@@ -323,7 +352,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
return;
pool = page->pool;
index = get_index(semaphore);
// Write a known value lower than the current payload in an attempt to catch
// release-after-free and acquire-after-free.
@@ -333,10 +361,9 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
uvm_mutex_lock(&pool->mutex);
semaphore->page = NULL;
semaphore->payload = NULL;
++pool->free_semaphores_count;
__set_bit(index, page->free_semaphores);
__set_bit(semaphore->index, page->free_semaphores);
uvm_mutex_unlock(&pool->mutex);
}
@@ -449,18 +476,72 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
{
NvU32 index = get_index(semaphore);
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
return base_va + UVM_SEMAPHORE_SIZE * index;
return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
}
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *base_va;
if (gpu_semaphore_is_secure(semaphore))
return &semaphore->conf_computing.cached_payload;
base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
}
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
}
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
semaphore->page->pool->gpu);
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
}
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
uvm_gpu_semaphore_notifier_t *notifier_base_va =
uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
return notifier_base_va + semaphore->index;
}
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
semaphore->page->pool->gpu);
return uvm_gpu_address_virtual_unprotected(notifier_base_va +
semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
}
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
}
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
semaphore->page->pool->gpu);
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
}
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
{
if (gpu_semaphore_is_secure(semaphore))
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
return UVM_GPU_READ_ONCE(*semaphore->payload);
return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
}
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
@@ -477,10 +558,7 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
// the GPU correctly even on non-SMP).
mb();
if (gpu_semaphore_is_secure(semaphore))
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
else
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
}
// This function is intended to catch channels which have been left dangling in
@@ -546,22 +624,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
}
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
{
// No new value, or the GPU is currently writing the new encrypted material
// and no change in value would still result in corrupted data.
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
}
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
{
UvmCslIv local_iv;
NvU32 local_payload;
NvU32 new_sem_value;
NvU32 gpu_notifier;
NvU32 last_observed_notifier;
NvU32 new_gpu_notifier = 0;
NvU32 iv_index = 0;
uvm_gpu_semaphore_notifier_t gpu_notifier;
uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
// A channel can have multiple entries pending and the tracking semaphore
// update of each entry can race with this function. Since the semaphore
@@ -570,64 +637,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
unsigned tries_left = channel->num_gpfifo_entries;
NV_STATUS status = NV_OK;
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(uvm_channel_is_ce(channel));
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
return;
do {
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
// Odd notifier value means there's an update in progress.
if (gpu_notifier % 2)
continue;
// There's no change since last time
if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
return;
// Make sure no memory accesses happen before we read the notifier
smp_mb__after_atomic();
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
// Make sure the second read of notifier happens after
// all memory accesses.
smp_mb__before_atomic();
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
tries_left--;
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
if (!tries_left) {
status = NV_ERR_INVALID_STATE;
goto error;
}
else {
NvU32 key_version;
const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
NvU32 new_semaphore_value;
UVM_ASSERT(gpu_notifier == new_gpu_notifier);
UVM_ASSERT(gpu_notifier % 2 == 0);
// CPU decryption is guaranteed to use the same key version as the
// associated GPU encryption, because if there was any key rotation in
// between, then key rotation waited for all channels to complete before
// proceeding. The wait implies that the semaphore value matches the
// last one encrypted on the GPU, so this CPU decryption should happen
// before the key is rotated.
key_version = uvm_channel_pool_key_version(channel->pool);
if (gpu_notifier == new_gpu_notifier) {
status = uvm_conf_computing_cpu_decrypt(channel,
&new_sem_value,
&new_semaphore_value,
&local_payload,
&local_iv,
sizeof(new_sem_value),
&semaphore->conf_computing.ivs[iv_index],
key_version,
sizeof(new_semaphore_value),
&local_auth_tag);
if (status != NV_OK)
goto error;
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
}
return;
return;
}
error:
// Decryption failure is a fatal error as well as running out of try left.
@@ -650,11 +725,11 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
else
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
// mechanism to all semaphore
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
}
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
@@ -690,7 +765,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
(NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
old_value, new_value);
// Use an atomic write even though the lock is held so that the value can

View File

@@ -29,6 +29,8 @@
#include "uvm_rm_mem.h"
#include "uvm_linux.h"
typedef NvU32 uvm_gpu_semaphore_notifier_t;
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
// that's used for synchronization among them.
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
@@ -45,17 +47,15 @@ struct uvm_gpu_semaphore_struct
// The semaphore pool page the semaphore came from
uvm_gpu_semaphore_pool_page_t *page;
// Pointer to the memory location
NvU32 *payload;
// Index of the semaphore in semaphore page
NvU16 index;
struct {
NvU16 index;
NvU32 cached_payload;
uvm_rm_mem_t *encrypted_payload;
uvm_rm_mem_t *notifier;
uvm_rm_mem_t *auth_tag;
UvmCslIv *ivs;
NvU32 last_pushed_notifier;
NvU32 last_observed_notifier;
NvU32 cached_payload;
uvm_gpu_semaphore_notifier_t last_pushed_notifier;
uvm_gpu_semaphore_notifier_t last_observed_notifier;
} conf_computing;
};
@@ -151,6 +151,17 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore);
// Read the 32-bit payload of the semaphore
// Notably doesn't provide any memory ordering guarantees and needs to be used with
// care. For an example of what needs to be considered see

View File

@@ -284,8 +284,10 @@ static void hmm_va_block_unregister_gpu(uvm_va_block_t *va_block,
// Reset preferred location and accessed-by of policy nodes if needed.
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
if (uvm_id_equal(node->policy.preferred_location, gpu->id))
if (uvm_va_policy_preferred_location_equal(&node->policy, gpu->id, NUMA_NO_NODE)) {
node->policy.preferred_location = UVM_ID_INVALID;
node->policy.preferred_nid = NUMA_NO_NODE;
}
uvm_processor_mask_clear(&node->policy.accessed_by, gpu->id);
}

View File

@@ -27,7 +27,7 @@
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
@@ -48,7 +48,9 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION_WLC);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);

View File

@@ -322,6 +322,15 @@
// Operations not allowed while holding this lock
// - GPU memory allocation which can evict
//
// - Channel pool key rotation lock
// Order: UVM_LOCK_ORDER_KEY_ROTATION
// Condition: Confidential Computing is enabled
// Mutex per channel pool
//
// The lock ensures mutual exclusion during key rotation affecting all the
// channels in the associated pool. Key rotation in WLC pools is handled
// using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
//
// - CE channel CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_PUSH
// Condition: The Confidential Computing feature is enabled
@@ -338,6 +347,15 @@
// Operations allowed while holding this lock
// - Pushing work to CE channels (except for WLC channels)
//
// - WLC channel pool key rotation lock
// Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
// Condition: Confidential Computing is enabled
// Mutex of WLC channel pool
//
// The lock has the same purpose as the regular channel pool key rotation
// lock. Using a different order lock for WLC channels allows key rotation
// on those channels during indirect work submission.
//
// - WLC CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
// Condition: The Confidential Computing feature is enabled
@@ -484,7 +502,9 @@ typedef enum
UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
UVM_LOCK_ORDER_CHUNK_MAPPING,
UVM_LOCK_ORDER_PAGE_TREE,
UVM_LOCK_ORDER_KEY_ROTATION,
UVM_LOCK_ORDER_CSL_PUSH,
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
UVM_LOCK_ORDER_CSL_WLC_PUSH,
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
UVM_LOCK_ORDER_PUSH,

View File

@@ -39,6 +39,7 @@
#include "uvm_pte_batch.h"
#include "uvm_tlb_batch.h"
#include "nv_uvm_interface.h"
#include "nv_uvm_types.h"
#include "uvm_pushbuffer.h"
@@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
pte_buffer->va_range = va_range;
pte_buffer->gpu = gpu;
pte_buffer->mapping_info.cachingType = map_rm_params->caching_type;
pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type;
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type;
pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type;
pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type;
pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits;
pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type;
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
pte_buffer->mapping_info.mappingPageSize = page_size;

View File

@@ -589,7 +589,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
skipped_migrate = true;
}
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
!uvm_id_equal(dest_id, policy->preferred_location)) {
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
// unless it's the preferred location
status = NV_ERR_INVALID_DEVICE;

View File

@@ -126,7 +126,7 @@ NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sys
NvU64 remove_key;
for (remove_key = base_key; remove_key < key; ++remove_key)
(void *)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
status = errno_to_nv_status(ret);

View File

@@ -671,6 +671,9 @@ static NV_STATUS va_block_set_read_duplication_locked(uvm_va_block_t *va_block,
uvm_assert_mutex_locked(&va_block->lock);
// Force CPU page residency to be on the preferred NUMA node.
va_block_context->make_resident.dest_nid = uvm_va_range_get_policy(va_block->va_range)->preferred_nid;
for_each_id_in_mask(src_id, &va_block->resident) {
NV_STATUS status;
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);

View File

@@ -100,16 +100,8 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
bool uvm_numa_id_eq(int nid0, int nid1)
{
UVM_ASSERT(nid0 == -1 || nid0 < MAX_NUMNODES);
UVM_ASSERT(nid1 == -1 || nid1 < MAX_NUMNODES);
if ((nid0 == NUMA_NO_NODE || nid1 == NUMA_NO_NODE) && nodes_weight(node_possible_map) == 1) {
if (nid0 == NUMA_NO_NODE)
nid0 = first_node(node_possible_map);
if (nid1 == NUMA_NO_NODE)
nid1 = first_node(node_possible_map);
}
UVM_ASSERT(nid0 >= NUMA_NO_NODE && nid0 < MAX_NUMNODES);
UVM_ASSERT(nid1 >= NUMA_NO_NODE && nid1 < MAX_NUMNODES);
return nid0 == nid1;
}

View File

@@ -65,9 +65,12 @@ typedef enum
} uvm_push_flag_t;
struct uvm_push_crypto_bundle_struct {
// Initialization vector used to decrypt the push
// Initialization vector used to decrypt the push on the CPU
UvmCslIv iv;
// Key version used to decrypt the push on the CPU
NvU32 key_version;
// Size of the pushbuffer that is encrypted/decrypted
NvU32 push_size;
};

View File

@@ -451,7 +451,6 @@ static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm
static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
{
NV_STATUS status;
NvU32 auth_tag_offset;
void *auth_tag_cpu_va;
void *push_protected_cpu_va;
void *push_unprotected_cpu_va;
@@ -470,16 +469,15 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
UVM_ASSERT(!uvm_channel_is_wlc(channel));
UVM_ASSERT(!uvm_channel_is_lcic(channel));
push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset;
push_protected_cpu_va = get_base_cpu_va(pushbuffer) + pushbuffer_offset;
push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) +
auth_tag_offset;
auth_tag_cpu_va = uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(channel, push_info_index);
status = uvm_conf_computing_cpu_decrypt(channel,
push_protected_cpu_va,
push_unprotected_cpu_va,
&crypto_bundle->iv,
crypto_bundle->key_version,
crypto_bundle->push_size,
auth_tag_cpu_va);
@@ -558,7 +556,7 @@ NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_
if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
// We need to use the same static locations for PB as the fixed
// schedule because that's what the channels are initialized to use.
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu);
return uvm_channel_get_static_pb_protected_vidmem_gpu_va(push->channel);
}
else if (uvm_channel_is_sec2(push->channel)) {
// SEC2 PBs are in unprotected sysmem
@@ -575,7 +573,7 @@ void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffe
if (uvm_channel_is_wlc(push->channel)) {
// Reuse existing WLC static pb for initialization
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu;
return uvm_channel_get_static_pb_unprotected_sysmem_cpu(push->channel);
}
pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
@@ -590,8 +588,8 @@ NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffe
if (uvm_channel_is_wlc(push->channel)) {
// Reuse existing WLC static pb for initialization
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem,
uvm_push_get_gpu(push));
return uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(push->channel);
}
pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));

View File

@@ -322,6 +322,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
UvmCslIv *decrypt_iv,
NvU32 key_version,
uvm_mem_t *auth_tag_mem,
size_t size,
size_t copy_size)
@@ -338,6 +339,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
dst_plain,
src_cipher,
&decrypt_iv[i],
key_version,
copy_size,
auth_tag_buffer));
@@ -368,7 +370,7 @@ static void gpu_encrypt(uvm_push_t *push,
uvm_gpu_address_t auth_tag_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
for (i = 0; i < num_iterations; i++) {
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
if (i > 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -427,6 +429,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
uvm_push_t push;
UvmCslIv *decrypt_iv;
NvU32 key_version;
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
if (!decrypt_iv)
@@ -456,6 +459,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
// There shouldn't be any key rotation between the end of the push and the
// CPU decryption(s), but it is more robust against test changes to force
// decryption to use the saved key.
key_version = uvm_channel_pool_key_version(push.channel->pool);
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher), out);
@@ -465,6 +473,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
dst_plain_cpu,
dst_cipher,
decrypt_iv,
key_version,
auth_tag_mem,
size,
copy_size),

View File

@@ -124,24 +124,23 @@ static NV_STATUS uvm_test_verify_bh_affinity(uvm_intr_handler_t *isr, int node)
static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS *params, struct file *filp)
{
uvm_gpu_t *gpu;
NV_STATUS status;
uvm_rm_user_object_t user_rm_va_space = {
.rm_control_fd = -1,
.user_client = params->client,
.user_object = params->smc_part_ref
};
NV_STATUS status = NV_OK;
if (!UVM_THREAD_AFFINITY_SUPPORTED())
return NV_ERR_NOT_SUPPORTED;
status = uvm_gpu_retain_by_uuid(&params->gpu_uuid, &user_rm_va_space, &gpu);
if (status != NV_OK)
return status;
uvm_mutex_lock(&g_uvm_global.global_lock);
gpu = uvm_gpu_get_by_uuid(&params->gpu_uuid);
if (!gpu) {
status = NV_ERR_INVALID_DEVICE;
goto unlock;
}
// If the GPU is not attached to a NUMA node, there is nothing to do.
if (gpu->parent->closest_cpu_numa_node == NUMA_NO_NODE) {
status = NV_ERR_NOT_SUPPORTED;
goto release;
goto unlock;
}
if (gpu->parent->replayable_faults_supported) {
@@ -150,7 +149,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
gpu->parent->closest_cpu_numa_node);
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
if (status != NV_OK)
goto release;
goto unlock;
if (gpu->parent->non_replayable_faults_supported) {
uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
@@ -158,7 +157,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
gpu->parent->closest_cpu_numa_node);
uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
if (status != NV_OK)
goto release;
goto unlock;
}
if (gpu->parent->access_counters_supported) {
@@ -168,8 +167,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
}
}
release:
uvm_gpu_release(gpu);
unlock:
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}

View File

@@ -347,20 +347,30 @@ typedef enum
UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH = 0,
UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS,
UVM_TEST_CHANNEL_STRESS_MODE_STREAM,
UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION,
} UVM_TEST_CHANNEL_STRESS_MODE;
typedef enum
{
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU,
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU,
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE,
} UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION;
#define UVM_TEST_CHANNEL_STRESS UVM_TEST_IOCTL_BASE(15)
typedef struct
{
NvU32 mode; // In
NvU32 mode; // In, one of UVM_TEST_CHANNEL_STRESS_MODE
// Number of iterations:
// mode == NOOP_PUSH: number of noop pushes
// mode == UPDATE_CHANNELS: number of updates
// mode == STREAM: number of iterations per stream
// mode == ROTATION: number of operations
NvU32 iterations;
NvU32 num_streams; // In, used only for mode == UVM_TEST_CHANNEL_STRESS_MODE_STREAM
NvU32 num_streams; // In, used only if mode == STREAM
NvU32 key_rotation_operation; // In, used only if mode == ROTATION
NvU32 seed; // In
NvU32 verbose; // In
NV_STATUS rmStatus; // Out
@@ -1210,8 +1220,6 @@ typedef struct
typedef struct
{
NvProcessorUuid gpu_uuid; // In
NvHandle client; // In
NvHandle smc_part_ref; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS;

View File

@@ -725,8 +725,9 @@ bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, u
}
// Return the preferred NUMA node ID for the block's policy.
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
// is returned.
// If the preferred node ID is NUMA_NO_NODE, the nearest NUMA node ID
// with memory is returned. In most cases, this should be the current
// NUMA node.
static int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context)
{
if (va_block_context->make_resident.dest_nid != NUMA_NO_NODE)
@@ -2070,6 +2071,7 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
uvm_page_mask_t *allocated_mask;
uvm_cpu_chunk_alloc_flags_t alloc_flags = UVM_CPU_CHUNK_ALLOC_FLAGS_NONE;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
const uvm_va_policy_t *policy = uvm_va_policy_get_region(block, populate_region);
uvm_page_index_t page_index;
uvm_gpu_id_t id;
int preferred_nid = block_context->make_resident.dest_nid;
@@ -2077,6 +2079,10 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
if (block_test && block_test->cpu_chunk_allocation_target_id != NUMA_NO_NODE)
preferred_nid = block_test->cpu_chunk_allocation_target_id;
// If the VA range has a preferred NUMA node, use it.
if (preferred_nid == NUMA_NO_NODE)
preferred_nid = policy->preferred_nid;
// TODO: Bug 4158598: Using NUMA_NO_NODE for staging allocations is sub-optimal.
if (preferred_nid != NUMA_NO_NODE) {
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(block, preferred_nid);
@@ -2127,13 +2133,12 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
uvm_page_mask_t *node_pages_mask = &block_context->make_resident.node_pages_mask;
uvm_chunk_sizes_mask_t allocation_sizes;
if (uvm_page_mask_test(allocated_mask, page_index)) {
if (uvm_page_mask_test(allocated_mask, page_index) ||
uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index)) {
page_index = uvm_va_block_next_unset_page_in_mask(populate_region, allocated_mask, page_index) - 1;
continue;
}
UVM_ASSERT(!uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index));
allocation_sizes = block_calculate_largest_alloc_size(block,
page_index,
allocated_mask,
@@ -3843,6 +3848,7 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
uvm_gpu_address_t staging_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
uvm_gpu_address_t auth_tag_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
uvm_gpu_address_t src_address = block_copy_get_address(block, &copy_state->src, page_index, gpu);
NvU32 key_version = uvm_channel_pool_key_version(push->channel->pool);
UVM_ASSERT(UVM_ID_IS_GPU(copy_state->src.id));
UVM_ASSERT(UVM_ID_IS_CPU(copy_state->dst.id));
@@ -3860,7 +3866,8 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
// crypto-operations and it only guarantees PAGE_SIZE contiguity, all
// encryptions and decryptions must happen on a PAGE_SIZE basis.
for_each_va_block_page_in_region(page_index, region) {
uvm_conf_computing_log_gpu_encryption(push->channel, &dma_buffer->decrypt_iv[page_index]);
uvm_conf_computing_log_gpu_encryption(push->channel, PAGE_SIZE, &dma_buffer->decrypt_iv[page_index]);
dma_buffer->key_version[page_index] = key_version;
// All but the first encryption can be pipelined. The first encryption
// uses the caller's pipelining settings.
@@ -3919,7 +3926,8 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
status = uvm_conf_computing_cpu_decrypt(push->channel,
cpu_page_address,
staging_buffer,
&dma_buffer->decrypt_iv[page_index],
dma_buffer->decrypt_iv + page_index,
dma_buffer->key_version[page_index],
PAGE_SIZE,
auth_tag_buffer);
kunmap(dst_page);
@@ -4037,7 +4045,7 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
UVM_ASSERT(dst_chunk);
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) <= uvm_cpu_chunk_get_size(dst_chunk));
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
// copies using GPU CEs. So, we have to make sure that all previously
@@ -5132,7 +5140,7 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
uvm_page_mask_t *dst_resident_mask;
uvm_page_mask_t *migrated_pages;
uvm_page_mask_t *staged_pages;
uvm_page_mask_t *first_touch_mask;
uvm_page_mask_t *scratch_residency_mask;
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
@@ -5151,6 +5159,10 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
scratch_residency_mask = kmem_cache_alloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS);
if (!scratch_residency_mask)
return NV_ERR_NO_MEMORY;
// For pages that are entering read-duplication we need to unmap remote
// mappings and revoke RW and higher access permissions.
//
@@ -5177,12 +5189,12 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
status = block_prep_read_duplicate_mapping(va_block, va_block_context, src_id, region, preprocess_page_mask);
if (status != NV_OK)
return status;
goto out;
}
status = block_populate_pages(va_block, va_block_retry, va_block_context, dest_id, region, page_mask);
if (status != NV_OK)
return status;
goto out;
status = block_copy_resident_pages(va_block,
va_block_context,
@@ -5192,22 +5204,17 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
prefetch_page_mask,
UVM_VA_BLOCK_TRANSFER_MODE_COPY);
if (status != NV_OK)
return status;
goto out;
// Pages that weren't resident anywhere else were populated at the
// destination directly. Mark them as resident now, since there were no
// errors from block_copy_resident_pages() above.
// Note that va_block_context->scratch_page_mask is passed to
// block_copy_set_first_touch_residency() which is generally unsafe but in
// this case, block_copy_set_first_touch_residency() copies page_mask
// before scratch_page_mask could be clobbered.
migrated_pages = &va_block_context->make_resident.pages_migrated;
first_touch_mask = &va_block_context->scratch_page_mask;
uvm_page_mask_init_from_region(first_touch_mask, region, page_mask);
uvm_page_mask_andnot(first_touch_mask, first_touch_mask, migrated_pages);
uvm_page_mask_init_from_region(scratch_residency_mask, region, page_mask);
uvm_page_mask_andnot(scratch_residency_mask, scratch_residency_mask, migrated_pages);
if (!uvm_page_mask_empty(first_touch_mask))
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, first_touch_mask);
if (!uvm_page_mask_empty(scratch_residency_mask))
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, scratch_residency_mask);
staged_pages = &va_block_context->make_resident.pages_staged;
if (!UVM_ID_IS_CPU(dest_id) && !uvm_page_mask_empty(staged_pages)) {
@@ -5219,6 +5226,18 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
if (!uvm_page_mask_empty(migrated_pages)) {
if (UVM_ID_IS_CPU(dest_id)) {
// Check if the CPU is already in the resident set of processors.
// We need to do this since we can't have multiple NUMA nodes with
// resident pages.
// If any of the migrate pages were already resident on the CPU, the
// residency has to be switched to the destination NUMA node.
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
uvm_page_mask_and(scratch_residency_mask,
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE),
migrated_pages)) {
uvm_va_block_cpu_clear_resident_all_chunks(va_block, va_block_context, scratch_residency_mask);
}
uvm_va_block_cpu_set_resident_all_chunks(va_block, va_block_context, migrated_pages);
}
else {
@@ -5247,7 +5266,9 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
// Check state of all chunks after residency change.
// TODO: Bug 4207783: Check both CPU and GPU chunks.
UVM_ASSERT(block_check_cpu_chunks(va_block));
return NV_OK;
out:
kmem_cache_free(g_uvm_page_mask_cache, scratch_residency_mask);
return status;
}
// Looks up the current CPU mapping state of page from the
@@ -5532,13 +5553,15 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
*block->read_duplicated_pages.bitmap);
// Test read_duplicated_pages mask
UVM_ASSERT_MSG((uvm_processor_mask_get_count(resident_processors) <= 1 &&
!uvm_page_mask_test(&block->read_duplicated_pages, page_index)) ||
(uvm_processor_mask_get_count(resident_processors) > 1 &&
uvm_page_mask_test(&block->read_duplicated_pages, page_index)),
UVM_ASSERT_MSG((!uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
uvm_processor_mask_get_count(resident_processors) <= 1) ||
(uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
uvm_processor_mask_get_count(resident_processors) >= 1),
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx - SWA: 0x%lx - RD: 0x%lx\n",
*resident_processors->bitmap,
*read_mappings->bitmap, *write_mappings->bitmap, *atomic_mappings->bitmap,
*read_mappings->bitmap,
*write_mappings->bitmap,
*atomic_mappings->bitmap,
*va_space->system_wide_atomics_enabled_processors.bitmap,
*block->read_duplicated_pages.bitmap);
@@ -6022,7 +6045,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
if (uvm_page_mask_empty(mapped_pages))
return false;
return !uvm_id_equal(uvm_va_range_get_policy(block->va_range)->preferred_location, gpu_id);
return !uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(block->va_range), gpu_id, NUMA_NO_NODE);
}
// Remote pages are pages which are mapped but not resident locally
@@ -8365,6 +8388,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
uvm_va_block_context_t *block_context,
uvm_gpu_t *gpu,
uvm_processor_id_t resident_id,
int resident_nid,
uvm_page_mask_t *map_page_mask,
uvm_prot_t new_prot,
uvm_tracker_t *out_tracker)
@@ -8374,7 +8398,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
uvm_push_t push;
NV_STATUS status;
uvm_page_mask_t *pages_to_map = &block_context->mapping.page_mask;
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, resident_nid);
uvm_pte_bits_gpu_t pte_bit;
uvm_pte_bits_gpu_t prot_pte_bit = get_gpu_pte_bit_index(new_prot);
uvm_va_block_new_pte_state_t *new_pte_state = &block_context->mapping.new_pte_state;
@@ -8383,8 +8407,10 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
UVM_ASSERT(map_page_mask);
UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], gpu->id));
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id))
UVM_ASSERT(uvm_id_equal(resident_id, uvm_va_range_get_policy(va_block->va_range)->preferred_location));
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id)) {
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
UVM_ASSERT(uvm_va_policy_preferred_location_equal(policy, resident_id, policy->preferred_nid));
}
UVM_ASSERT(!uvm_page_mask_and(&block_context->scratch_page_mask,
map_page_mask,
@@ -8486,18 +8512,27 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
return uvm_tracker_add_push_safe(out_tracker, &push);
}
// allowed_nid_mask is only valid if the CPU is set in allowed_mask.
static void map_get_allowed_destinations(uvm_va_block_t *block,
uvm_va_block_context_t *va_block_context,
const uvm_va_policy_t *policy,
uvm_processor_id_t id,
uvm_processor_mask_t *allowed_mask)
uvm_processor_mask_t *allowed_mask,
nodemask_t *allowed_nid_mask)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
*allowed_nid_mask = node_possible_map;
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(block), id)) {
// UVM-Lite can only map resident pages on the preferred location
uvm_processor_mask_zero(allowed_mask);
uvm_processor_mask_set(allowed_mask, policy->preferred_location);
if (UVM_ID_IS_CPU(policy->preferred_location) &&
!uvm_va_policy_preferred_location_equal(policy, UVM_ID_CPU, NUMA_NO_NODE)) {
nodes_clear(*allowed_nid_mask);
node_set(policy->preferred_nid, *allowed_nid_mask);
}
}
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
(uvm_id_equal(policy->preferred_location, id) &&
@@ -8540,6 +8575,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
NV_STATUS status = NV_OK;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
nodemask_t *allowed_nid_destinations;
va_block_context->mapping.cause = cause;
@@ -8589,10 +8625,20 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
if (!allowed_destinations)
return NV_ERR_NO_MEMORY;
allowed_nid_destinations = uvm_kvmalloc(sizeof(*allowed_nid_destinations));
if (!allowed_nid_destinations) {
uvm_processor_mask_cache_free(allowed_destinations);
return NV_ERR_NO_MEMORY;
}
// Map per resident location so we can more easily detect physically-
// contiguous mappings.
map_get_allowed_destinations(va_block, va_block_context, policy, id, allowed_destinations);
map_get_allowed_destinations(va_block,
va_block_context,
policy,
id,
allowed_destinations,
allowed_nid_destinations);
for_each_closest_id(resident_id, allowed_destinations, id, va_space) {
if (UVM_ID_IS_CPU(id)) {
status = block_map_cpu_to(va_block,
@@ -8603,11 +8649,30 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
new_prot,
out_tracker);
}
else if (UVM_ID_IS_CPU(resident_id)) {
int nid;
// map_get_allowed_distinations() will set the mask of CPU NUMA
// nodes that should be mapped.
for_each_node_mask(nid, *allowed_nid_destinations) {
status = block_map_gpu_to(va_block,
va_block_context,
gpu,
resident_id,
nid,
running_page_mask,
new_prot,
out_tracker);
if (status != NV_OK)
break;
}
}
else {
status = block_map_gpu_to(va_block,
va_block_context,
gpu,
resident_id,
NUMA_NO_NODE,
running_page_mask,
new_prot,
out_tracker);
@@ -8622,6 +8687,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
}
uvm_processor_mask_cache_free(allowed_destinations);
uvm_kvfree(allowed_nid_destinations);
return status;
}
@@ -11175,8 +11241,8 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
// so uvm_va_block_map will be a no-op.
uvm_processor_mask_and(map_uvm_lite_gpus, map_other_processors, block_get_uvm_lite_gpus(va_block));
if (!uvm_processor_mask_empty(map_uvm_lite_gpus) &&
uvm_id_equal(new_residency, preferred_location)) {
for_each_id_in_mask(map_processor_id, map_uvm_lite_gpus) {
uvm_va_policy_preferred_location_equal(policy, new_residency, va_block_context->make_resident.dest_nid)) {
for_each_id_in_mask (map_processor_id, map_uvm_lite_gpus) {
status = uvm_va_block_map(va_block,
va_block_context,
map_processor_id,
@@ -11637,6 +11703,10 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
// For GPU faults, the bottom half is pinned to CPUs closest to their GPU.
// Therefore, in both cases, we can use numa_mem_id() to get the NUMA node
// ID of the faulting processor.
// Note that numa_mem_id() returns the nearest node with memory. In most
// cases, this will be the current NUMA node. However, in the case that the
// current node does not have any memory, we probably want the nearest node
// with memory, anyway.
int current_nid = numa_mem_id();
bool may_read_duplicate = can_read_duplicate(va_block, page_index, policy, thrashing_hint);
@@ -11660,7 +11730,12 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
// If read duplication is enabled and the page is also resident on the CPU,
// keep its current NUMA node residency.
if (may_read_duplicate && uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
return block_get_page_node_residency(va_block, page_index);
return NUMA_NO_NODE;
// The new_residency processor is the CPU and the preferred location is not
// the CPU. If the page is resident on the CPU, keep its current residency.
if (uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
return NUMA_NO_NODE;
return current_nid;
}
@@ -12564,125 +12639,6 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
}
// Launch a synchronous, encrypted copy between GPU and CPU.
//
// The copy entails a GPU-side encryption (relying on the Copy Engine), and a
// CPU-side decryption step, such that the destination CPU buffer pointed by
// dst_plain will contain the unencrypted (plain text) contents. The destination
// buffer can be in protected or unprotected sysmem, while the source buffer
// must be in protected vidmem.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
UvmCslIv decrypt_iv;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
void *src_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
uvm_conf_computing_log_gpu_encryption(push.channel, &decrypt_iv);
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
goto out;
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
status = uvm_conf_computing_cpu_decrypt(push.channel, dst_plain, src_cipher, &decrypt_iv, size, auth_tag);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
// Launch a synchronous, encrypted copy between CPU and GPU.
//
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
// text) contents; the function internally performs a CPU-side encryption step
// before launching the GPU-side CE decryption. The source buffer can be in
// protected or unprotected sysmem, while the destination buffer must be in
// protected vidmem.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
static NV_STATUS encrypted_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
void *src_plain,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
void *dst_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
@@ -12695,14 +12651,14 @@ static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
uvm_gpu_address_t src_gpu_address;
if (g_uvm_global.conf_computing_enabled) {
return encrypted_memcopy_cpu_to_gpu(gpu,
dst_gpu_address,
uvm_mem_get_cpu_addr_kernel(src_mem),
size,
&va_block->tracker,
"Encrypted write to [0x%llx, 0x%llx)",
dst,
dst + size);
return uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
dst_gpu_address,
uvm_mem_get_cpu_addr_kernel(src_mem),
size,
&va_block->tracker,
"Encrypted write to [0x%llx, 0x%llx)",
dst,
dst + size);
}
status = uvm_push_begin_acquire(gpu->channel_manager,
@@ -12799,14 +12755,14 @@ static NV_STATUS va_block_read_gpu_to_cpu(uvm_va_block_t *va_block,
uvm_gpu_address_t dst_gpu_address;
if (g_uvm_global.conf_computing_enabled) {
return encrypted_memcopy_gpu_to_cpu(gpu,
uvm_mem_get_cpu_addr_kernel(dst_mem),
src_gpu_address,
size,
&va_block->tracker,
"Encrypted read from [0x%llx, 0x%llx)",
src,
src + size);
return uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
uvm_mem_get_cpu_addr_kernel(dst_mem),
src_gpu_address,
size,
&va_block->tracker,
"Encrypted read from [0x%llx, 0x%llx)",
src,
src + size);
}
status = uvm_push_begin_acquire(gpu->channel_manager,

View File

@@ -105,6 +105,12 @@ bool uvm_va_policy_preferred_location_equal(const uvm_va_policy_t *policy, uvm_p
{
bool equal = uvm_id_equal(policy->preferred_location, proc);
if (!UVM_ID_IS_CPU(policy->preferred_location))
UVM_ASSERT(policy->preferred_nid == NUMA_NO_NODE);
if (!UVM_ID_IS_CPU(proc))
UVM_ASSERT(cpu_numa_id == NUMA_NO_NODE);
if (equal && UVM_ID_IS_CPU(policy->preferred_location))
equal = uvm_numa_id_eq(policy->preferred_nid, cpu_numa_id);
@@ -656,7 +662,7 @@ const uvm_va_policy_t *uvm_va_policy_set_preferred_location(uvm_va_block_t *va_b
// and that the policy is changing.
UVM_ASSERT(node->node.start >= start);
UVM_ASSERT(node->node.end <= end);
UVM_ASSERT(!uvm_id_equal(node->policy.preferred_location, processor_id));
UVM_ASSERT(!uvm_va_policy_preferred_location_equal(&node->policy, processor_id, cpu_node_id));
}
node->policy.preferred_location = processor_id;

View File

@@ -868,9 +868,9 @@ static void uvm_va_range_disable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_
// preferred location. If peer mappings are being disabled to the
// preferred location, then unmap the other GPU.
// Nothing to do otherwise.
if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu0->id))
if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu0->id, NUMA_NO_NODE))
uvm_lite_gpu_to_unmap = gpu1;
else if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu1->id))
else if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu1->id, NUMA_NO_NODE))
uvm_lite_gpu_to_unmap = gpu0;
else
return;
@@ -951,7 +951,7 @@ static void va_range_unregister_gpu_managed(uvm_va_range_t *va_range, uvm_gpu_t
// Reset preferred location and accessed-by of VA ranges if needed
// Note: ignoring the return code of uvm_va_range_set_preferred_location since this
// will only return on error when setting a preferred location, not on a reset
if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu->id))
if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu->id, NUMA_NO_NODE))
(void)uvm_va_range_set_preferred_location(va_range, UVM_ID_INVALID, NUMA_NO_NODE, mm, NULL);
uvm_va_range_unset_accessed_by(va_range, gpu->id, NULL);
@@ -1683,7 +1683,7 @@ void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
// If a UVM-Lite GPU is being removed from the accessed_by mask, it will
// also stop being a UVM-Lite GPU unless it's also the preferred location.
if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id) &&
!uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, processor_id)) {
!uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), processor_id, NUMA_NO_NODE)) {
range_unmap(va_range, processor_id, out_tracker);
}

View File

@@ -0,0 +1,42 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
*/
#include "os-interface.h"
#include "internal_crypt_lib.h"
#include "library/cryptlib.h"
bool libspdm_check_crypto_backend(void)
{
#ifdef USE_LKCA
nv_printf(NV_DBG_INFO, "libspdm_check_crypto_backend: LKCA wrappers found.\n");
nv_printf(NV_DBG_INFO, "libspdm_check_crypto_backend: LKCA calls may still fail if modules have not been loaded!\n");
return true;
#else
nv_printf(NV_DBG_ERRORS, "libspdm_check_crypto_backend: Error - libspdm expects LKCA but found stubs!\n");
return false;
#endif
}

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -37,6 +37,10 @@
#include <linux/kernfs.h>
#endif
#if !defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
#include <linux/iommu.h>
#endif
static void
nv_check_and_exclude_gpu(
nvidia_stack_t *sp,
@@ -530,35 +534,21 @@ nv_pci_probe
if (pci_dev->is_virtfn)
{
#if defined(NV_VGPU_KVM_BUILD)
nvl = pci_get_drvdata(pci_dev->physfn);
if (!nvl)
#if defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
if (pci_dev->dev.bus->iommu_ops == NULL)
#else
if ((pci_dev->dev.iommu != NULL) && (pci_dev->dev.iommu->iommu_dev != NULL) &&
(pci_dev->dev.iommu->iommu_dev->ops == NULL))
#endif
{
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
"since PF is not bound to nvidia driver.\n",
"since IOMMU is not present on the system.\n",
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
goto failed;
}
if (pci_dev->dev.bus->iommu_ops == NULL)
{
nv = NV_STATE_PTR(nvl);
if (rm_is_iommu_needed_for_sriov(sp, nv))
{
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
"since IOMMU is not present on the system.\n",
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
goto failed;
}
}
if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK)
{
nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module");
goto failed;
}
nv_kmem_cache_free_stack(sp);
return 0;
#else

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -45,6 +45,11 @@ typedef struct gpuObject *gpuObjectHandle;
typedef struct gpuRetainedChannel_struct gpuRetainedChannel;
NV_STATUS calculatePCIELinkRateMBps(NvU32 lanes,
NvU32 pciLinkMaxSpeed,
NvU32 *pcieLinkRate);
NV_STATUS nvGpuOpsCreateSession(struct gpuSession **session);
NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session);
@@ -286,11 +291,11 @@ NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
NvBool bEnable);
// Interface used for CCSL
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
gpuChannelHandle channel);
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslContextUpdate(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslRotateKey(UvmCslContext *contextList[],
NvU32 contextListCount);
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
NvU8 direction);
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
@@ -308,6 +313,7 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU32 keyRotationId,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
@@ -323,7 +329,8 @@ NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
NvU8 direction,
NvU64 increment,
NvU8 *iv);
NV_STATUS nvGpuOpsLogDeviceEncryption(struct ccslContext_t *ctx,
NvU32 bufferSize);
NV_STATUS nvGpuOpsLogEncryption(struct ccslContext_t *ctx,
NvU8 direction,
NvU32 bufferSize);
#endif /* _NV_GPU_OPS_H_*/

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -1516,16 +1516,23 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
}
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext)
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
NvU32 contextListCount)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
nvidia_stack_t *sp;
status = rm_gpu_ops_ccsl_context_update(sp, uvmCslContext->ctx);
if ((contextList == NULL) || (contextListCount == 0) || (contextList[0] == NULL))
{
return NV_ERR_INVALID_ARGUMENT;
}
sp = contextList[0]->nvidia_stack;
status = rm_gpu_ops_ccsl_rotate_key(sp, contextList, contextListCount);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslUpdateContext);
EXPORT_SYMBOL(nvUvmInterfaceCslRotateKey);
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
UvmCslOperation operation)
@@ -1562,6 +1569,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU32 keyRotationId,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
@@ -1575,6 +1583,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
bufferSize,
inputBuffer,
(NvU8 *)decryptIv,
keyRotationId,
outputBuffer,
addAuthData,
addAuthDataSize,
@@ -1625,17 +1634,18 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
}
EXPORT_SYMBOL(nvUvmInterfaceCslIncrementIv);
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
NvU32 bufferSize)
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU32 bufferSize)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
status = rm_gpu_ops_ccsl_log_device_encryption(sp, uvmCslContext->ctx, bufferSize);
status = rm_gpu_ops_ccsl_log_encryption(sp, uvmCslContext->ctx, operation, bufferSize);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslLogExternalEncryption);
EXPORT_SYMBOL(nvUvmInterfaceCslLogEncryption);
#else // NV_UVM_ENABLE

View File

@@ -41,6 +41,7 @@ NVIDIA_SOURCES += nvidia/libspdm_rsa.c
NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
NVIDIA_SOURCES += nvidia/libspdm_sha.c
NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
NVIDIA_SOURCES += nvidia/libspdm_internal_crypt_lib.c
NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
NVIDIA_SOURCES += nvidia/libspdm_ec.c
NVIDIA_SOURCES += nvidia/libspdm_x509.c

View File

@@ -161,7 +161,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_enable_atomic_ops_to_root
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed
@@ -228,6 +228,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_alloc_me
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops
@@ -251,6 +252,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -38,4 +38,4 @@ bool libspdm_aead_aes_gcm_decrypt_prealloc(void *context,
const uint8_t *data_in, size_t data_in_size,
const uint8_t *tag, size_t tag_size,
uint8_t *data_out, size_t *data_out_size);
bool libspdm_check_crypto_backend(void);

View File

@@ -36,10 +36,28 @@ static inline int nv_follow_pfn(struct vm_area_struct *vma,
unsigned long address,
unsigned long *pfn)
{
#if defined(NV_UNSAFE_FOLLOW_PFN_PRESENT)
return unsafe_follow_pfn(vma, address, pfn);
#else
#if defined(NV_FOLLOW_PFN_PRESENT)
return follow_pfn(vma, address, pfn);
#else
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
int status = 0;
spinlock_t *ptl;
pte_t *ptep;
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
return status;
status = follow_pte(vma, address, &ptep, &ptl);
if (status)
return status;
*pfn = pte_pfn(ptep_get(ptep));
// The lock is acquired inside follow_pte()
pte_unmap_unlock(ptep, ptl);
return 0;
#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
return -1;
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
#endif
}