mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-02 06:29:47 +00:00
550.90.07
This commit is contained in:
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.78\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.90.07\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
|
||||
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
|
||||
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
|
||||
#define CMD_VGPU_VFIO_PRESENT 3
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 4
|
||||
#define CMD_VFIO_WAKE_REMOVE_GPU 1
|
||||
#define CMD_VGPU_VFIO_PRESENT 2
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 3
|
||||
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
|
||||
typedef enum _VGPU_TYPE_INFO
|
||||
{
|
||||
@@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *vgpuVfioRef;
|
||||
void *waitQueue;
|
||||
void *nv;
|
||||
NvU32 *vgpuTypeIds;
|
||||
NvU8 **vgpuNames;
|
||||
NvU32 numVgpuTypes;
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
NvU8 slot;
|
||||
NvU8 function;
|
||||
NvBool is_virtfn;
|
||||
NvU32 domain;
|
||||
NvU32 bus;
|
||||
NvU32 device;
|
||||
NvU32 return_status;
|
||||
} vgpu_vfio_info;
|
||||
|
||||
typedef struct
|
||||
|
||||
@@ -1614,6 +1614,10 @@ typedef struct nv_linux_state_s {
|
||||
nv_kthread_q_t open_q;
|
||||
NvBool is_accepting_opens;
|
||||
struct semaphore open_q_lock;
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
wait_queue_head_t wait;
|
||||
NvS32 return_status;
|
||||
#endif
|
||||
} nv_linux_state_t;
|
||||
|
||||
extern nv_linux_state_t *nv_linux_devices;
|
||||
|
||||
@@ -1041,13 +1041,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -1505,23 +1505,35 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslUpdateContext
|
||||
nvUvmInterfaceCslRotateKey
|
||||
|
||||
Updates a context after a key rotation event and can only be called once per
|
||||
key rotation event. Following a key rotation event, and before
|
||||
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
|
||||
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
|
||||
Disables channels and rotates keys.
|
||||
|
||||
Locking: This function acquires an API lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
This function disables channels and rotates associated keys. The channels
|
||||
associated with the given CSL contexts must be idled before this function is
|
||||
called. To trigger key rotation all allocated channels for a given key must
|
||||
be present in the list. If the function returns successfully then the CSL
|
||||
contexts have been updated with the new key.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure
|
||||
to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
|
||||
guarantee that no CSL function, including this one, is invoked
|
||||
concurrently with the CSL contexts in contextList.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context associated with a channel.
|
||||
|
||||
contextList[IN/OUT] - An array of pointers to CSL contexts.
|
||||
contextListCount[IN] - Number of CSL contexts in contextList. Its value
|
||||
must be greater than 0.
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
|
||||
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
|
||||
can retry at a later time.
|
||||
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
|
||||
when attempting to acquire a lock.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
@@ -1529,17 +1541,13 @@ NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
For a given operation the channel must be idle before calling this function.
|
||||
This function can be called regardless of the value of the IV's message counter.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock.
|
||||
In case of failure to acquire the return code
|
||||
is NV_ERR_STATE_IN_USE.
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure to
|
||||
acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
|
||||
that no CSL function, including this one, is invoked concurrently with
|
||||
the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1573,8 +1581,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1610,9 +1618,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
During a key rotation event the previous key is stored in the CSL context.
|
||||
This allows data encrypted by the GPU to be decrypted with the previous key.
|
||||
The keyRotationId parameter identifies which key is used. The first key rotation
|
||||
ID has a value of 0 that increments by one for each key rotation event.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1622,6 +1635,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
keyRotationId[IN] - Specifies the key that is used for decryption.
|
||||
A value of NV_U32_MAX specifies the current key.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
@@ -1642,6 +1657,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU32 keyRotationId,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
@@ -1656,8 +1672,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
undefined behavior.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1685,8 +1701,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@@ -1711,8 +1727,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1734,28 +1750,41 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *iv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogExternalEncryption
|
||||
nvUvmInterfaceCslLogEncryption
|
||||
|
||||
Checks and logs information about non-CSL encryptions, such as those that
|
||||
originate from the GPU.
|
||||
Checks and logs information about encryptions associated with the given
|
||||
CSL context.
|
||||
|
||||
This function does not modify elements of the UvmCslContext.
|
||||
For contexts associated with channels, this function does not modify elements of
|
||||
the UvmCslContext, and must be called for every CPU/GPU encryption.
|
||||
|
||||
For the context associated with fault buffers, bufferSize can encompass multiple
|
||||
encryption invocations, and the UvmCslContext will be updated following a key
|
||||
rotation event.
|
||||
|
||||
In either case the IV remains unmodified after this function is called.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[OUT] - The size of the buffer encrypted by the
|
||||
operation[IN] - If the CSL context is associated with a fault
|
||||
buffer, this argument is ignored. If it is
|
||||
associated with a channel, it must be either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
bufferSize[IN] - The size of the buffer(s) encrypted by the
|
||||
external entity in units of bytes.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
|
||||
to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize);
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// The errorNotifier is filled out when the channel hits an RC error.
|
||||
NvNotification *errorNotifier;
|
||||
NvNotification *keyRotationNotifier;
|
||||
|
||||
NvU32 hwRunlistId;
|
||||
NvU32 hwChannelId;
|
||||
@@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
} UvmGpuChannelInfo;
|
||||
|
||||
typedef enum
|
||||
@@ -604,6 +605,8 @@ typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
// Is key rotation enabled for UVM keys
|
||||
NvBool bKeyRotationEnabled;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
@@ -1086,4 +1089,21 @@ typedef enum UvmCslOperation
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
typedef enum UVM_KEY_ROTATION_STATUS {
|
||||
// Key rotation complete/not in progress
|
||||
UVM_KEY_ROTATION_STATUS_IDLE = 0,
|
||||
// RM is waiting for clients to report their channels are idle for key rotation
|
||||
UVM_KEY_ROTATION_STATUS_PENDING = 1,
|
||||
// Key rotation is in progress
|
||||
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
|
||||
// Key rotation timeout failure, RM will RC non-idle channels.
|
||||
// UVM should never see this status value.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
|
||||
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
|
||||
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
|
||||
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
|
||||
} UVM_KEY_ROTATION_STATUS;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -103,14 +103,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_key(nvidia_stack_t *, UvmCslContext *[], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU32, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU32);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1416,6 +1416,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
bus_type_has_iommu_ops)
|
||||
#
|
||||
# Determine if 'bus_type' structure has a 'iommu_ops' field.
|
||||
#
|
||||
# This field was removed by commit 17de3f5fdd35 (iommu: Retire bus ops)
|
||||
# in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
|
||||
int conftest_bus_type_has_iommu_ops(void) {
|
||||
return offsetof(struct bus_type, iommu_ops);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_BUS_TYPE_HAS_IOMMU_OPS" "" "types"
|
||||
;;
|
||||
|
||||
eventfd_signal_has_counter_arg)
|
||||
#
|
||||
# Determine if eventfd_signal() function has an additional 'counter' argument.
|
||||
#
|
||||
# This argument was removed by commit 3652117f8548 (eventfd: simplify
|
||||
# eventfd_signal()) in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/eventfd.h>
|
||||
|
||||
void conftest_eventfd_signal_has_counter_arg(void) {
|
||||
struct eventfd_ctx *ctx;
|
||||
|
||||
eventfd_signal(ctx, 1);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_available)
|
||||
# Determine if the DRM subsystem is usable
|
||||
CODE="
|
||||
@@ -5216,25 +5252,23 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
|
||||
;;
|
||||
|
||||
unsafe_follow_pfn)
|
||||
follow_pfn)
|
||||
#
|
||||
# Determine if unsafe_follow_pfn() is present.
|
||||
# Determine if follow_pfn() is present.
|
||||
#
|
||||
# unsafe_follow_pfn() was added by commit 69bacee7f9ad
|
||||
# ("mm: Add unsafe_follow_pfn") in v5.13-rc1.
|
||||
#
|
||||
# Note: this commit never made it to the linux kernel, so
|
||||
# unsafe_follow_pfn() never existed.
|
||||
# follow_pfn() was added by commit 3b6748e2dd69
|
||||
# ("mm: introduce follow_pfn()") in v2.6.31-rc1, and removed
|
||||
# by commit 233eb0bf3b94 ("mm: remove follow_pfn")
|
||||
# from linux-next 233eb0bf3b94.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_unsafe_follow_pfn(void) {
|
||||
unsafe_follow_pfn();
|
||||
void conftest_follow_pfn(void) {
|
||||
follow_pfn();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_UNSAFE_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
#
|
||||
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -1448,7 +1448,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -1462,6 +1464,11 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// The VA range exceeds the largest virtual address supported by the
|
||||
// destination processor.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// preferredCpuMemoryNode is not a valid CPU NUMA node or it corresponds
|
||||
// to a NUMA node ID for a registered GPU. If NUMA is disabled, it
|
||||
// indicates that preferredCpuMemoryNode was not either 0 or -1.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// destinationUuid does not represent a valid processor such as a CPU or
|
||||
// a GPU with a GPU VA space registered for it. Or destinationUuid is a
|
||||
@@ -1528,8 +1535,9 @@ NV_STATUS UvmMigrate(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU. This argument is ignored if the given virtual address range
|
||||
// corresponds to managed memory.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// semaphoreAddress: (INPUT)
|
||||
// Base address of the semaphore.
|
||||
@@ -1586,8 +1594,8 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
//
|
||||
// Migrates the backing of all virtual address ranges associated with the given
|
||||
// range group to the specified destination processor. The behavior of this API
|
||||
// is equivalent to calling UvmMigrate on each VA range associated with this
|
||||
// range group.
|
||||
// is equivalent to calling UvmMigrate with preferredCpuMemoryNode = -1 on each
|
||||
// VA range associated with this range group.
|
||||
//
|
||||
// Any errors encountered during migration are returned immediately. No attempt
|
||||
// is made to migrate the remaining unmigrated ranges and the ranges that are
|
||||
@@ -2169,7 +2177,8 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
//
|
||||
// If any page in the VA range has a preferred location, then the migration and
|
||||
// mapping policies associated with this API take precedence over those related
|
||||
// to the preferred location.
|
||||
// to the preferred location. If the preferred location is a specific CPU NUMA
|
||||
// node, that NUMA node will be used for a CPU-resident copy of the page.
|
||||
//
|
||||
// If any pages in this VA range have any processors present in their
|
||||
// accessed-by list, the migration and mapping policies associated with this
|
||||
@@ -2300,7 +2309,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// UvmPreventMigrationRangeGroups has not been called on the range group that
|
||||
// those pages are associated with, then the migration and mapping policies
|
||||
// associated with UvmEnableReadDuplication override the policies outlined
|
||||
// above. Note that enabling read duplication on on any pages in this VA range
|
||||
// above. Note that enabling read duplication on any pages in this VA range
|
||||
// does not clear the state set by this API for those pages. It merely overrides
|
||||
// the policies associated with this state until read duplication is disabled
|
||||
// for those pages.
|
||||
@@ -2333,7 +2342,8 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies.
|
||||
// allowed by the global and thread memory policies. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
|
||||
@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
NvU32 key_version;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -228,21 +228,65 @@ typedef struct
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
union
|
||||
{
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its
|
||||
// internal operation and each push may modify this state.
|
||||
// push_locks is protected by the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
|
||||
// Per channel buffers in unprotected sysmem.
|
||||
uvm_rm_mem_t *pool_sysmem;
|
||||
|
||||
// Per channel buffers in protected vidmem.
|
||||
uvm_rm_mem_t *pool_vidmem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Current encryption key version, incremented upon key rotation.
|
||||
// While there are separate keys for encryption and decryption, the
|
||||
// two keys are rotated at once, so the versioning applies to both.
|
||||
NvU32 version;
|
||||
|
||||
// Lock used to ensure mutual exclusion during key rotation.
|
||||
uvm_mutex_t mutex;
|
||||
|
||||
// CSL contexts passed to RM for key rotation. This is usually an
|
||||
// array containing the CSL contexts associated with the channels in
|
||||
// the pool. In the case of the WLC pool, the array also includes
|
||||
// CSL contexts associated with LCIC channels.
|
||||
UvmCslContext **csl_contexts;
|
||||
|
||||
// Number of elements in the CSL context array.
|
||||
unsigned num_csl_contexts;
|
||||
|
||||
// Number of bytes encrypted, or decrypted, on the engine associated
|
||||
// with the pool since the last key rotation. Only used during
|
||||
// testing, to force key rotations after a certain encryption size,
|
||||
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
|
||||
//
|
||||
// Encryptions on a LCIC pool are accounted for in the paired WLC
|
||||
// pool.
|
||||
//
|
||||
// TODO: Bug 4612912: these accounting variables can be removed once
|
||||
// RM exposes an API to set the key rotation lower threshold.
|
||||
atomic64_t encrypted;
|
||||
atomic64_t decrypted;
|
||||
} key_rotation;
|
||||
|
||||
} conf_computing;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -322,43 +366,14 @@ struct uvm_channel_struct
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
// Protected sysmem location makes WLC independent from the pushbuffer
|
||||
// allocator. Unprotected sysmem and protected vidmem counterparts
|
||||
// are allocated from the channel pool (sysmem, vidmem).
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
@@ -418,7 +433,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has at least one pool associated with it.
|
||||
// has at least one pool of type UVM_CHANNEL_POOL_TYPE_CE associated with it
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@@ -451,6 +466,16 @@ struct uvm_channel_manager_struct
|
||||
UVM_BUFFER_LOCATION gpput_loc;
|
||||
UVM_BUFFER_LOCATION pushbuffer_loc;
|
||||
} conf;
|
||||
|
||||
struct
|
||||
{
|
||||
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
|
||||
// only be false during (de)initialization.
|
||||
bool wlc_ready;
|
||||
|
||||
// True indicates that key rotation is enabled (UVM-wise).
|
||||
bool key_rotation_enabled;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// Create a channel manager for the GPU
|
||||
@@ -501,6 +526,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
|
||||
|
||||
char *uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(uvm_channel_t *channel, unsigned tag_index);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@@ -532,6 +565,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
return UVM_CHANNEL_TYPE_MEMOPS;
|
||||
}
|
||||
|
||||
// Force key rotation in the engine associated with the given channel pool.
|
||||
// Rotation may still not happen if RM cannot acquire the necessary locks (in
|
||||
// which case the function returns NV_ERR_STATE_IN_USE).
|
||||
//
|
||||
// This function should be only invoked in pools in which key rotation is
|
||||
// enabled.
|
||||
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Retrieve the current encryption key version associated with the channel pool.
|
||||
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
|
||||
|
||||
// Privileged channels support all the Host and engine methods, while
|
||||
// non-privileged channels don't support privileged methods.
|
||||
//
|
||||
@@ -579,12 +623,9 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
return manager->conf_computing.wlc_ready;
|
||||
}
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
|
||||
@@ -796,11 +796,8 @@ done:
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_push_t *pushes = NULL;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
@@ -810,9 +807,19 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
// Key rotation is disabled because this test relies on nested pushes,
|
||||
// which is illegal. If any push other than the first one triggers key
|
||||
// rotation, the test won't complete. This is because key rotation
|
||||
// depends on waiting for ongoing pushes to end, which doesn't happen
|
||||
// if those pushes are ended after the current one begins.
|
||||
uvm_conf_computing_disable_key_rotation(gpu);
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
|
||||
TEST_CHECK_GOTO(pool != NULL, error);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
@@ -824,7 +831,7 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
TEST_CHECK_GOTO(pushes != NULL, error);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
@@ -841,12 +848,18 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
if (gpu != NULL)
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
@@ -948,6 +961,318 @@ release:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
|
||||
{
|
||||
unsigned num_tries;
|
||||
unsigned max_num_tries = 20;
|
||||
unsigned num_rotations_completed = 0;
|
||||
|
||||
if (num_rotations == 0)
|
||||
return NV_OK;
|
||||
|
||||
// The number of accepted rotations is kept low, so failed rotation
|
||||
// invocations due to RM not acquiring the necessary locks (which imply a
|
||||
// sleep in the test) do not balloon the test execution time.
|
||||
UVM_ASSERT(num_rotations <= 10);
|
||||
|
||||
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
|
||||
// Force key rotation, irrespective of encryption usage.
|
||||
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
|
||||
|
||||
// Key rotation may not be able to complete due to RM failing to acquire
|
||||
// the necessary locks. Detect the situation, sleep for a bit, and then
|
||||
// try again
|
||||
//
|
||||
// The maximum time spent sleeping in a single rotation call is
|
||||
// (max_num_tries * max_sleep_us)
|
||||
if (status == NV_ERR_STATE_IN_USE) {
|
||||
NvU32 min_sleep_us = 1000;
|
||||
NvU32 max_sleep_us = 10000;
|
||||
|
||||
usleep_range(min_sleep_us, max_sleep_us);
|
||||
continue;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
num_rotations_completed++;
|
||||
}
|
||||
|
||||
// If not a single key rotation occurred, the dependent tests still pass,
|
||||
// but there is no much value to them. Instead, return an error so the
|
||||
// maximum number of tries, or the maximum sleep time, are adjusted to
|
||||
// ensure that at least one rotation completes.
|
||||
if (num_rotations_completed > 0)
|
||||
return NV_OK;
|
||||
else
|
||||
return NV_ERR_STATE_IN_USE;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return force_key_rotations(pool, 1);
|
||||
}
|
||||
|
||||
// Test key rotation in all pools. This is useful because key rotation may not
|
||||
// happen otherwise on certain engines during UVM test execution. For example,
|
||||
// if the MEMOPS channel type is mapped to a CE not shared with any other
|
||||
// channel type, then the only encryption taking place in the engine is due to
|
||||
// semaphore releases (4 bytes each). This small encryption size makes it
|
||||
// unlikely to exceed even small rotation thresholds.
|
||||
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, gpu->channel_manager) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(force_key_rotation(pool));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
|
||||
// key rotations.
|
||||
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
void *final_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
final_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (final_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
final_plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
NULL,
|
||||
"GPU > CPU"),
|
||||
out);
|
||||
|
||||
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
|
||||
|
||||
memset(final_plain_cpu, 0, size);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(final_plain_cpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
|
||||
// testing to insert key rotations in between the push end, and the CPU
|
||||
// decryption
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
uvm_channel_t *channel;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
channel = push.channel;
|
||||
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
|
||||
|
||||
// If num_rotations_to_insert is not zero, the current encryption key will
|
||||
// be different from the one used during CE encryption.
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
|
||||
unsigned num_repetitions,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
unsigned i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
NvU8 *plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
|
||||
if (plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
|
||||
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
for (i = 0; i < num_repetitions; i++) {
|
||||
unsigned j;
|
||||
|
||||
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
num_rotations_to_insert),
|
||||
out);
|
||||
|
||||
for (j = 0; j < size; j++)
|
||||
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
|
||||
|
||||
memset(plain_cpu, 0, size);
|
||||
|
||||
}
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
|
||||
// that are no longer the current key, due to key rotation. Given that SEC2
|
||||
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
|
||||
// is exclusive of CE encryptions.
|
||||
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
|
||||
// between the GPU encryption, and the associated CPU decryption.
|
||||
unsigned num_rotations_to_insert = 8;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
break;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -1203,6 +1528,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_channel_key_rotation(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@@ -1338,6 +1667,126 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
unsigned num_rotations_to_insert = 0;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
|
||||
|
||||
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
|
||||
|
||||
for (i = 0; i < params->iterations; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_type_t type;
|
||||
|
||||
if ((i % 3) == 0)
|
||||
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
|
||||
else if ((i % 3) == 1)
|
||||
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
|
||||
else
|
||||
type = UVM_CHANNEL_TYPE_WLC;
|
||||
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
status = force_key_rotation(pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The objective of this test is documented in the user-level function
|
||||
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
uvm_test_rng_t rng;
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
uvm_test_rng_init(&rng, params->seed);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
|
||||
// one.
|
||||
gpu = random_va_space_gpu(&rng, va_space);
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
goto out;
|
||||
|
||||
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
|
||||
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
|
||||
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
|
||||
status = channel_stress_key_rotation_rotate(gpu, params);
|
||||
else
|
||||
status = NV_ERR_INVALID_PARAMETER;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@@ -1349,6 +1798,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
|
||||
return uvm_test_channel_stress_update_channels(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
|
||||
return uvm_test_channel_noop_push(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
|
||||
return uvm_test_channel_stress_key_rotation(va_space, params);
|
||||
default:
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,15 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
// Amount of encrypted data on a given engine that triggers key rotation. This
|
||||
// is a UVM internal threshold, different from that of RM, and used only during
|
||||
// testing.
|
||||
//
|
||||
// Key rotation is triggered when the total encryption size, or the total
|
||||
// decryption size (whatever comes first) reaches this lower threshold on the
|
||||
// engine.
|
||||
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
|
||||
|
||||
// The maximum number of secure operations per push is:
|
||||
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
|
||||
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
|
||||
@@ -352,6 +361,19 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The production key rotation defaults are such that key rotations rarely
|
||||
// happen. During UVM testing more frequent rotations are triggering by relying
|
||||
// on internal encryption usage accounting. When key rotations are triggered by
|
||||
// UVM, the driver does not rely on channel key rotation notifiers.
|
||||
//
|
||||
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
|
||||
// lower threshold. This function, and all the metadata associated with it
|
||||
// (per-pool encryption accounting, for example) can be removed at that point.
|
||||
static bool key_rotation_is_notifier_driven(void)
|
||||
{
|
||||
return !uvm_enable_builtin_tests;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -394,17 +416,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
|
||||
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
|
||||
}
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
@@ -428,27 +468,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
UVM_ASSERT(size);
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(NvU8 const *) src_plain,
|
||||
encrypt_iv,
|
||||
(NvU8 *) dst_cipher,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer)
|
||||
{
|
||||
@@ -469,10 +528,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
key_version,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
channel->name,
|
||||
uvm_gpu_name(uvm_channel_get_gpu(channel)));
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
return status;
|
||||
@@ -485,6 +553,8 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@@ -494,36 +564,48 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(csl_context,
|
||||
fault_entry_size,
|
||||
(const NvU8 *) src_cipher,
|
||||
NULL,
|
||||
NV_U32_MAX,
|
||||
(NvU8 *) dst_plain,
|
||||
&valid,
|
||||
sizeof(valid),
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
UVM_CSL_OPERATION_DECRYPT,
|
||||
increment,
|
||||
NULL);
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
@@ -625,3 +707,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
// Key rotation cannot be enabled on UVM if it is disabled on RM
|
||||
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
|
||||
return false;
|
||||
|
||||
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
|
||||
// because currently the encryption key is shared between UVM and RM, but
|
||||
// UVM is not able to idle SEC2 channels owned by RM.
|
||||
if (uvm_channel_pool_is_sec2(pool))
|
||||
return false;
|
||||
|
||||
// Key rotation happens as part of channel reservation, and LCIC channels
|
||||
// are never reserved directly. Rotation of keys in LCIC channels happens
|
||||
// as the result of key rotation in WLC channels.
|
||||
//
|
||||
// Return false even if there is nothing fundamental prohibiting direct key
|
||||
// rotation on LCIC pools
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NvU64 decrypted, encrypted;
|
||||
|
||||
UVM_ASSERT(!key_rotation_is_notifier_driven());
|
||||
|
||||
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
|
||||
|
||||
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
|
||||
|
||||
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
|
||||
{
|
||||
// If key rotation is pending for the pool's engine, then the key rotation
|
||||
// notifier in any of the engine channels can be used by UVM to detect the
|
||||
// situation. Note that RM doesn't update all the notifiers in a single
|
||||
// atomic operation, so it is possible that the channel read by UVM (the
|
||||
// first one in the pool) indicates that a key rotation is pending, but
|
||||
// another channel in the pool (temporarily) indicates the opposite, or vice
|
||||
// versa.
|
||||
uvm_channel_t *first_channel = pool->channels;
|
||||
|
||||
UVM_ASSERT(key_rotation_is_notifier_driven());
|
||||
UVM_ASSERT(first_channel != NULL);
|
||||
|
||||
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return false;
|
||||
|
||||
if (key_rotation_is_notifier_driven())
|
||||
return conf_computing_is_key_rotation_pending_use_notifier(pool);
|
||||
else
|
||||
return conf_computing_is_key_rotation_pending_use_stats(pool);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
|
||||
|
||||
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
|
||||
// required locks at this time. This status is not interpreted as an error,
|
||||
// but as a sign for UVM to try again later. This is the same "protocol"
|
||||
// used in IV rotation.
|
||||
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
|
||||
pool->conf_computing.key_rotation.num_csl_contexts);
|
||||
|
||||
if (status == NV_OK) {
|
||||
pool->conf_computing.key_rotation.version++;
|
||||
|
||||
if (!key_rotation_is_notifier_driven()) {
|
||||
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
|
||||
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
|
||||
}
|
||||
}
|
||||
else if (status != NV_ERR_STATE_IN_USE) {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
|
||||
pool->engine_index,
|
||||
nvstatusToString(status));
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -87,9 +87,9 @@ typedef struct
|
||||
// a free buffer.
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
|
||||
// writes the authentication tag here. Later when the buffer is decrypted
|
||||
// on the CPU the authentication tag is used again (read) for CSL to verify
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// engine (CE or SEC2) writes the authentication tag here. When the buffer
|
||||
// is decrypted on the CPU the authentication tag is used by CSL to verify
|
||||
// the authenticity. The allocation is big enough for one authentication
|
||||
// tag per PAGE_SIZE page in the alloc buffer.
|
||||
uvm_mem_t *auth_tag;
|
||||
@@ -98,7 +98,12 @@ typedef struct
|
||||
// to the authentication tag. The allocation is big enough for one IV per
|
||||
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
|
||||
// IV and authentication tag must match.
|
||||
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
|
||||
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// key version used during GPU encryption of each PAGE_SIZE page can be
|
||||
// saved here, so CPU decryption uses the correct decryption key.
|
||||
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// Bitmap of the encrypted pages in the backing allocation
|
||||
uvm_page_mask_t encrypted_page_mask;
|
||||
@@ -147,7 +152,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
// Logs encryption information from the GPU and returns the IV.
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
|
||||
|
||||
// Acquires next CPU encryption IV and returns it.
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
@@ -167,10 +172,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
// CPU side decryption helper. Decrypts data from src_cipher and writes the
|
||||
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
|
||||
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
|
||||
//
|
||||
// The caller must indicate which key to use for decryption by passing the
|
||||
// appropiate key version number.
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer);
|
||||
|
||||
@@ -191,12 +200,12 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid);
|
||||
|
||||
// Increment the CPU-side decrypt IV of the CSL context associated with
|
||||
// replayable faults. The function is a no-op if the given increment is zero.
|
||||
// replayable faults.
|
||||
//
|
||||
// The IV associated with a fault CSL context is a 64-bit counter.
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Query the number of remaining messages before IV needs to be rotated.
|
||||
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
|
||||
@@ -214,4 +223,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
// Check if there are fewer than 'limit' messages available in either direction
|
||||
// and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
|
||||
|
||||
// Rotate the engine key associated with the given channel pool.
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is allowed in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is pending in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
|
||||
// dependent on RM enablement: key rotation may still be disabled upon calling
|
||||
// this function, if it is disabled in RM. On the other hand, key rotation can
|
||||
// be disabled in UVM, even if it is enabled in RM.
|
||||
//
|
||||
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
|
||||
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
|
||||
// kernel key rotation.
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
|
||||
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
|
||||
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
|
||||
@@ -591,7 +591,7 @@ static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU
|
||||
// replayable faults still requires manual adjustment so it is kept in sync
|
||||
// with the encryption IV on the GSP-RM's side.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);
|
||||
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
|
||||
}
|
||||
|
||||
@@ -60,6 +60,17 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
// Allocation backing the page
|
||||
uvm_rm_mem_t *memory;
|
||||
|
||||
struct {
|
||||
// Unprotected sysmem storing encrypted value of semaphores
|
||||
uvm_rm_mem_t *encrypted_payload_memory;
|
||||
|
||||
// Unprotected sysmem storing encryption auth tags
|
||||
uvm_rm_mem_t *auth_tag_memory;
|
||||
|
||||
// Unprotected sysmem storing plain text notifier values
|
||||
uvm_rm_mem_t *notifier_memory;
|
||||
} conf_computing;
|
||||
|
||||
// Pool the page is part of
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
|
||||
@@ -80,26 +91,6 @@ static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
|
||||
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
|
||||
}
|
||||
|
||||
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU32 offset;
|
||||
NvU32 index;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return semaphore->conf_computing.index;
|
||||
|
||||
UVM_ASSERT(semaphore->payload != NULL);
|
||||
UVM_ASSERT(semaphore->page != NULL);
|
||||
|
||||
offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
|
||||
|
||||
index = offset / UVM_SEMAPHORE_SIZE;
|
||||
UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Use canary values on debug builds to catch semaphore use-after-free. We can
|
||||
// catch release-after-free by simply setting the payload to a known value at
|
||||
// free then checking it on alloc or pool free, but catching acquire-after-free
|
||||
@@ -150,34 +141,83 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
|
||||
// the owning GPU as no other processor have access to it.
|
||||
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page,
|
||||
uvm_rm_mem_type_t memory_type)
|
||||
static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
uvm_rm_mem_free(page->memory);
|
||||
page->memory = NULL;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(page->pool)) {
|
||||
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.notifier_memory);
|
||||
|
||||
page->conf_computing.encrypted_payload_memory = NULL;
|
||||
page->conf_computing.auth_tag_memory = NULL;
|
||||
page->conf_computing.notifier_memory = NULL;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
|
||||
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
|
||||
UVM_ASSERT(!page->conf_computing.notifier_memory);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_t *pool = page->pool;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
size_t align = 0;
|
||||
bool map_all = true;
|
||||
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
|
||||
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
|
||||
|
||||
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
|
||||
status = uvm_rm_mem_alloc(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&pool_page->memory);
|
||||
if (map_all)
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
else
|
||||
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto error;
|
||||
|
||||
if (!gpu_semaphore_pool_is_secure(pool))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&page->conf_computing.encrypted_payload_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&page->conf_computing.auth_tag_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
|
||||
0,
|
||||
&page->conf_computing.notifier_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
error:
|
||||
pool_page_free_buffers(page);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -188,24 +228,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
// Whenever the Confidential Computing feature is enabled, engines can
|
||||
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
|
||||
// also disabled.
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
status = pool_alloc_secure_page(pool, pool_page, memory_type);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
status = pool_page_alloc_buffers(pool_page);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
@@ -217,7 +242,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
|
||||
if (semaphore_uses_canary(pool)) {
|
||||
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
size_t i;
|
||||
NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
|
||||
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
|
||||
payloads[i] = make_canary(0);
|
||||
}
|
||||
@@ -253,7 +280,7 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
|
||||
|
||||
pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
list_del(&page->all_pages_node);
|
||||
uvm_rm_mem_free(page->memory);
|
||||
pool_page_free_buffers(page);
|
||||
uvm_kvfree(page);
|
||||
}
|
||||
|
||||
@@ -273,19 +300,22 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
|
||||
goto done;
|
||||
|
||||
list_for_each_entry(page, &pool->pages, all_pages_node) {
|
||||
NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
|
||||
continue;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
semaphore->conf_computing.index = semaphore_index;
|
||||
}
|
||||
else {
|
||||
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
|
||||
semaphore_index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
semaphore->page = page;
|
||||
semaphore->index = semaphore_index;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
|
||||
// Reset the notifier to prevent detection of false attack when
|
||||
// checking for updated value
|
||||
*uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
|
||||
}
|
||||
|
||||
if (semaphore_uses_canary(pool))
|
||||
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
@@ -311,7 +341,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(semaphore);
|
||||
|
||||
@@ -323,7 +352,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
return;
|
||||
|
||||
pool = page->pool;
|
||||
index = get_index(semaphore);
|
||||
|
||||
// Write a known value lower than the current payload in an attempt to catch
|
||||
// release-after-free and acquire-after-free.
|
||||
@@ -333,10 +361,9 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
|
||||
semaphore->page = NULL;
|
||||
semaphore->payload = NULL;
|
||||
|
||||
++pool->free_semaphores_count;
|
||||
__set_bit(index, page->free_semaphores);
|
||||
__set_bit(semaphore->index, page->free_semaphores);
|
||||
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
}
|
||||
@@ -449,18 +476,72 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
|
||||
{
|
||||
NvU32 index = get_index(semaphore);
|
||||
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
|
||||
|
||||
return base_va + UVM_SEMAPHORE_SIZE * index;
|
||||
return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *base_va;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return &semaphore->conf_computing.cached_payload;
|
||||
|
||||
base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
|
||||
|
||||
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_notifier_t *notifier_base_va =
|
||||
uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
|
||||
return notifier_base_va + semaphore->index;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va +
|
||||
semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
|
||||
}
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
|
||||
|
||||
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
|
||||
|
||||
return UVM_GPU_READ_ONCE(*semaphore->payload);
|
||||
return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
|
||||
}
|
||||
|
||||
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
|
||||
@@ -477,10 +558,7 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
|
||||
// the GPU correctly even on non-SMP).
|
||||
mb();
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
|
||||
else
|
||||
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
|
||||
UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
|
||||
}
|
||||
|
||||
// This function is intended to catch channels which have been left dangling in
|
||||
@@ -546,22 +624,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
|
||||
}
|
||||
|
||||
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
|
||||
static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
// No new value, or the GPU is currently writing the new encrypted material
|
||||
// and no change in value would still result in corrupted data.
|
||||
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
|
||||
}
|
||||
|
||||
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
UvmCslIv local_iv;
|
||||
NvU32 local_payload;
|
||||
NvU32 new_sem_value;
|
||||
NvU32 gpu_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 new_gpu_notifier = 0;
|
||||
NvU32 iv_index = 0;
|
||||
uvm_gpu_semaphore_notifier_t gpu_notifier;
|
||||
uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
|
||||
|
||||
// A channel can have multiple entries pending and the tracking semaphore
|
||||
// update of each entry can race with this function. Since the semaphore
|
||||
@@ -570,64 +637,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
unsigned tries_left = channel->num_gpfifo_entries;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
|
||||
|
||||
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
|
||||
return;
|
||||
|
||||
do {
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
|
||||
UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
|
||||
|
||||
// Odd notifier value means there's an update in progress.
|
||||
if (gpu_notifier % 2)
|
||||
continue;
|
||||
|
||||
// There's no change since last time
|
||||
if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
|
||||
return;
|
||||
|
||||
// Make sure no memory accesses happen before we read the notifier
|
||||
smp_mb__after_atomic();
|
||||
|
||||
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
|
||||
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
|
||||
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
|
||||
// Make sure the second read of notifier happens after
|
||||
// all memory accesses.
|
||||
smp_mb__before_atomic();
|
||||
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
tries_left--;
|
||||
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
|
||||
|
||||
if (!tries_left) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
NvU32 key_version;
|
||||
const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
NvU32 new_semaphore_value;
|
||||
|
||||
UVM_ASSERT(gpu_notifier == new_gpu_notifier);
|
||||
UVM_ASSERT(gpu_notifier % 2 == 0);
|
||||
|
||||
// CPU decryption is guaranteed to use the same key version as the
|
||||
// associated GPU encryption, because if there was any key rotation in
|
||||
// between, then key rotation waited for all channels to complete before
|
||||
// proceeding. The wait implies that the semaphore value matches the
|
||||
// last one encrypted on the GPU, so this CPU decryption should happen
|
||||
// before the key is rotated.
|
||||
key_version = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
if (gpu_notifier == new_gpu_notifier) {
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
&new_sem_value,
|
||||
&new_semaphore_value,
|
||||
&local_payload,
|
||||
&local_iv,
|
||||
sizeof(new_sem_value),
|
||||
&semaphore->conf_computing.ivs[iv_index],
|
||||
key_version,
|
||||
sizeof(new_semaphore_value),
|
||||
&local_auth_tag);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
|
||||
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
|
||||
}
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
error:
|
||||
// Decryption failure is a fatal error as well as running out of try left.
|
||||
@@ -650,11 +725,11 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
else
|
||||
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
|
||||
|
||||
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
|
||||
if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
|
||||
// mechanism to all semaphore
|
||||
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
|
||||
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
}
|
||||
|
||||
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
|
||||
@@ -690,7 +765,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
|
||||
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
|
||||
uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
|
||||
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
|
||||
(NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
|
||||
old_value, new_value);
|
||||
|
||||
// Use an atomic write even though the lock is held so that the value can
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#include "uvm_rm_mem.h"
|
||||
#include "uvm_linux.h"
|
||||
|
||||
typedef NvU32 uvm_gpu_semaphore_notifier_t;
|
||||
|
||||
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
|
||||
// that's used for synchronization among them.
|
||||
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
|
||||
@@ -45,17 +47,15 @@ struct uvm_gpu_semaphore_struct
|
||||
// The semaphore pool page the semaphore came from
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
|
||||
// Pointer to the memory location
|
||||
NvU32 *payload;
|
||||
// Index of the semaphore in semaphore page
|
||||
NvU16 index;
|
||||
|
||||
struct {
|
||||
NvU16 index;
|
||||
NvU32 cached_payload;
|
||||
uvm_rm_mem_t *encrypted_payload;
|
||||
uvm_rm_mem_t *notifier;
|
||||
uvm_rm_mem_t *auth_tag;
|
||||
UvmCslIv *ivs;
|
||||
NvU32 last_pushed_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 cached_payload;
|
||||
|
||||
uvm_gpu_semaphore_notifier_t last_pushed_notifier;
|
||||
uvm_gpu_semaphore_notifier_t last_observed_notifier;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
@@ -151,6 +151,17 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
// Read the 32-bit payload of the semaphore
|
||||
// Notably doesn't provide any memory ordering guarantees and needs to be used with
|
||||
// care. For an example of what needs to be considered see
|
||||
|
||||
@@ -284,8 +284,10 @@ static void hmm_va_block_unregister_gpu(uvm_va_block_t *va_block,
|
||||
|
||||
// Reset preferred location and accessed-by of policy nodes if needed.
|
||||
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
|
||||
if (uvm_id_equal(node->policy.preferred_location, gpu->id))
|
||||
if (uvm_va_policy_preferred_location_equal(&node->policy, gpu->id, NUMA_NO_NODE)) {
|
||||
node->policy.preferred_location = UVM_ID_INVALID;
|
||||
node->policy.preferred_nid = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
uvm_processor_mask_clear(&node->policy.accessed_by, gpu->id);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@@ -48,7 +48,9 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION_WLC);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
|
||||
|
||||
@@ -322,6 +322,15 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
// - Channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex per channel pool
|
||||
//
|
||||
// The lock ensures mutual exclusion during key rotation affecting all the
|
||||
// channels in the associated pool. Key rotation in WLC pools is handled
|
||||
// using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
|
||||
//
|
||||
// - CE channel CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@@ -338,6 +347,15 @@
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to CE channels (except for WLC channels)
|
||||
//
|
||||
// - WLC channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex of WLC channel pool
|
||||
//
|
||||
// The lock has the same purpose as the regular channel pool key rotation
|
||||
// lock. Using a different order lock for WLC channels allows key rotation
|
||||
// on those channels during indirect work submission.
|
||||
//
|
||||
// - WLC CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@@ -484,7 +502,9 @@ typedef enum
|
||||
UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
|
||||
UVM_LOCK_ORDER_CHUNK_MAPPING,
|
||||
UVM_LOCK_ORDER_PAGE_TREE,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION,
|
||||
UVM_LOCK_ORDER_CSL_PUSH,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
|
||||
UVM_LOCK_ORDER_CSL_WLC_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "uvm_pte_batch.h"
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
#include "uvm_pushbuffer.h"
|
||||
|
||||
@@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
|
||||
pte_buffer->va_range = va_range;
|
||||
pte_buffer->gpu = gpu;
|
||||
pte_buffer->mapping_info.cachingType = map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
|
||||
pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type;
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
|
||||
pte_buffer->mapping_info.mappingPageSize = page_size;
|
||||
|
||||
|
||||
@@ -589,7 +589,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
skipped_migrate = true;
|
||||
}
|
||||
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
||||
!uvm_id_equal(dest_id, policy->preferred_location)) {
|
||||
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
|
||||
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
|
||||
// unless it's the preferred location
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@@ -126,7 +126,7 @@ NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sys
|
||||
NvU64 remove_key;
|
||||
|
||||
for (remove_key = base_key; remove_key < key; ++remove_key)
|
||||
(void *)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
|
||||
status = errno_to_nv_status(ret);
|
||||
|
||||
@@ -671,6 +671,9 @@ static NV_STATUS va_block_set_read_duplication_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
// Force CPU page residency to be on the preferred NUMA node.
|
||||
va_block_context->make_resident.dest_nid = uvm_va_range_get_policy(va_block->va_range)->preferred_nid;
|
||||
|
||||
for_each_id_in_mask(src_id, &va_block->resident) {
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);
|
||||
|
||||
@@ -100,16 +100,8 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
|
||||
bool uvm_numa_id_eq(int nid0, int nid1)
|
||||
{
|
||||
UVM_ASSERT(nid0 == -1 || nid0 < MAX_NUMNODES);
|
||||
UVM_ASSERT(nid1 == -1 || nid1 < MAX_NUMNODES);
|
||||
|
||||
if ((nid0 == NUMA_NO_NODE || nid1 == NUMA_NO_NODE) && nodes_weight(node_possible_map) == 1) {
|
||||
if (nid0 == NUMA_NO_NODE)
|
||||
nid0 = first_node(node_possible_map);
|
||||
|
||||
if (nid1 == NUMA_NO_NODE)
|
||||
nid1 = first_node(node_possible_map);
|
||||
}
|
||||
UVM_ASSERT(nid0 >= NUMA_NO_NODE && nid0 < MAX_NUMNODES);
|
||||
UVM_ASSERT(nid1 >= NUMA_NO_NODE && nid1 < MAX_NUMNODES);
|
||||
|
||||
return nid0 == nid1;
|
||||
}
|
||||
|
||||
@@ -65,9 +65,12 @@ typedef enum
|
||||
} uvm_push_flag_t;
|
||||
|
||||
struct uvm_push_crypto_bundle_struct {
|
||||
// Initialization vector used to decrypt the push
|
||||
// Initialization vector used to decrypt the push on the CPU
|
||||
UvmCslIv iv;
|
||||
|
||||
// Key version used to decrypt the push on the CPU
|
||||
NvU32 key_version;
|
||||
|
||||
// Size of the pushbuffer that is encrypted/decrypted
|
||||
NvU32 push_size;
|
||||
};
|
||||
|
||||
@@ -451,7 +451,6 @@ static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm
|
||||
static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 auth_tag_offset;
|
||||
void *auth_tag_cpu_va;
|
||||
void *push_protected_cpu_va;
|
||||
void *push_unprotected_cpu_va;
|
||||
@@ -470,16 +469,15 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
|
||||
UVM_ASSERT(!uvm_channel_is_wlc(channel));
|
||||
UVM_ASSERT(!uvm_channel_is_lcic(channel));
|
||||
|
||||
push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset;
|
||||
push_protected_cpu_va = get_base_cpu_va(pushbuffer) + pushbuffer_offset;
|
||||
push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
|
||||
auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) +
|
||||
auth_tag_offset;
|
||||
auth_tag_cpu_va = uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(channel, push_info_index);
|
||||
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
push_protected_cpu_va,
|
||||
push_unprotected_cpu_va,
|
||||
&crypto_bundle->iv,
|
||||
crypto_bundle->key_version,
|
||||
crypto_bundle->push_size,
|
||||
auth_tag_cpu_va);
|
||||
|
||||
@@ -558,7 +556,7 @@ NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_
|
||||
if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
|
||||
// We need to use the same static locations for PB as the fixed
|
||||
// schedule because that's what the channels are initialized to use.
|
||||
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu);
|
||||
return uvm_channel_get_static_pb_protected_vidmem_gpu_va(push->channel);
|
||||
}
|
||||
else if (uvm_channel_is_sec2(push->channel)) {
|
||||
// SEC2 PBs are in unprotected sysmem
|
||||
@@ -575,7 +573,7 @@ void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffe
|
||||
if (uvm_channel_is_wlc(push->channel)) {
|
||||
// Reuse existing WLC static pb for initialization
|
||||
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
||||
return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu;
|
||||
return uvm_channel_get_static_pb_unprotected_sysmem_cpu(push->channel);
|
||||
}
|
||||
|
||||
pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
|
||||
@@ -590,8 +588,8 @@ NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffe
|
||||
if (uvm_channel_is_wlc(push->channel)) {
|
||||
// Reuse existing WLC static pb for initialization
|
||||
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
||||
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem,
|
||||
uvm_push_get_gpu(push));
|
||||
|
||||
return uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(push->channel);
|
||||
}
|
||||
|
||||
pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
|
||||
|
||||
@@ -322,6 +322,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
size_t copy_size)
|
||||
@@ -338,6 +339,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
&decrypt_iv[i],
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer));
|
||||
|
||||
@@ -368,7 +370,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t auth_tag_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@@ -427,6 +429,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
uvm_push_t push;
|
||||
UvmCslIv *decrypt_iv;
|
||||
NvU32 key_version;
|
||||
|
||||
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!decrypt_iv)
|
||||
@@ -456,6 +459,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher), out);
|
||||
@@ -465,6 +473,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
dst_plain_cpu,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
||||
@@ -124,24 +124,23 @@ static NV_STATUS uvm_test_verify_bh_affinity(uvm_intr_handler_t *isr, int node)
|
||||
static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status;
|
||||
uvm_rm_user_object_t user_rm_va_space = {
|
||||
.rm_control_fd = -1,
|
||||
.user_client = params->client,
|
||||
.user_object = params->smc_part_ref
|
||||
};
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!UVM_THREAD_AFFINITY_SUPPORTED())
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
status = uvm_gpu_retain_by_uuid(¶ms->gpu_uuid, &user_rm_va_space, &gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
gpu = uvm_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (!gpu) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
// If the GPU is not attached to a NUMA node, there is nothing to do.
|
||||
if (gpu->parent->closest_cpu_numa_node == NUMA_NO_NODE) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto release;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (gpu->parent->replayable_faults_supported) {
|
||||
@@ -150,7 +149,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto release;
|
||||
goto unlock;
|
||||
|
||||
if (gpu->parent->non_replayable_faults_supported) {
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
|
||||
@@ -158,7 +157,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto release;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (gpu->parent->access_counters_supported) {
|
||||
@@ -168,8 +167,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
}
|
||||
}
|
||||
release:
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -347,20 +347,30 @@ typedef enum
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH = 0,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_STREAM,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION,
|
||||
} UVM_TEST_CHANNEL_STRESS_MODE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU,
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU,
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE,
|
||||
} UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION;
|
||||
|
||||
#define UVM_TEST_CHANNEL_STRESS UVM_TEST_IOCTL_BASE(15)
|
||||
typedef struct
|
||||
{
|
||||
NvU32 mode; // In
|
||||
NvU32 mode; // In, one of UVM_TEST_CHANNEL_STRESS_MODE
|
||||
|
||||
// Number of iterations:
|
||||
// mode == NOOP_PUSH: number of noop pushes
|
||||
// mode == UPDATE_CHANNELS: number of updates
|
||||
// mode == STREAM: number of iterations per stream
|
||||
// mode == ROTATION: number of operations
|
||||
NvU32 iterations;
|
||||
|
||||
NvU32 num_streams; // In, used only for mode == UVM_TEST_CHANNEL_STRESS_MODE_STREAM
|
||||
NvU32 num_streams; // In, used only if mode == STREAM
|
||||
NvU32 key_rotation_operation; // In, used only if mode == ROTATION
|
||||
NvU32 seed; // In
|
||||
NvU32 verbose; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
@@ -1210,8 +1220,6 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // In
|
||||
NvHandle client; // In
|
||||
NvHandle smc_part_ref; // In
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS;
|
||||
|
||||
@@ -725,8 +725,9 @@ bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, u
|
||||
}
|
||||
|
||||
// Return the preferred NUMA node ID for the block's policy.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
|
||||
// is returned.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the nearest NUMA node ID
|
||||
// with memory is returned. In most cases, this should be the current
|
||||
// NUMA node.
|
||||
static int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
if (va_block_context->make_resident.dest_nid != NUMA_NO_NODE)
|
||||
@@ -2070,6 +2071,7 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
uvm_page_mask_t *allocated_mask;
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags = UVM_CPU_CHUNK_ALLOC_FLAGS_NONE;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(block, populate_region);
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_id_t id;
|
||||
int preferred_nid = block_context->make_resident.dest_nid;
|
||||
@@ -2077,6 +2079,10 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
if (block_test && block_test->cpu_chunk_allocation_target_id != NUMA_NO_NODE)
|
||||
preferred_nid = block_test->cpu_chunk_allocation_target_id;
|
||||
|
||||
// If the VA range has a preferred NUMA node, use it.
|
||||
if (preferred_nid == NUMA_NO_NODE)
|
||||
preferred_nid = policy->preferred_nid;
|
||||
|
||||
// TODO: Bug 4158598: Using NUMA_NO_NODE for staging allocations is sub-optimal.
|
||||
if (preferred_nid != NUMA_NO_NODE) {
|
||||
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(block, preferred_nid);
|
||||
@@ -2127,13 +2133,12 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
uvm_page_mask_t *node_pages_mask = &block_context->make_resident.node_pages_mask;
|
||||
uvm_chunk_sizes_mask_t allocation_sizes;
|
||||
|
||||
if (uvm_page_mask_test(allocated_mask, page_index)) {
|
||||
if (uvm_page_mask_test(allocated_mask, page_index) ||
|
||||
uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index)) {
|
||||
page_index = uvm_va_block_next_unset_page_in_mask(populate_region, allocated_mask, page_index) - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_cpu_is_page_resident_on(block, preferred_nid, page_index));
|
||||
|
||||
allocation_sizes = block_calculate_largest_alloc_size(block,
|
||||
page_index,
|
||||
allocated_mask,
|
||||
@@ -3843,6 +3848,7 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
|
||||
uvm_gpu_address_t staging_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
uvm_gpu_address_t auth_tag_buffer = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
uvm_gpu_address_t src_address = block_copy_get_address(block, ©_state->src, page_index, gpu);
|
||||
NvU32 key_version = uvm_channel_pool_key_version(push->channel->pool);
|
||||
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(copy_state->src.id));
|
||||
UVM_ASSERT(UVM_ID_IS_CPU(copy_state->dst.id));
|
||||
@@ -3860,7 +3866,8 @@ static void conf_computing_block_copy_push_gpu_to_cpu(uvm_va_block_t *block,
|
||||
// crypto-operations and it only guarantees PAGE_SIZE contiguity, all
|
||||
// encryptions and decryptions must happen on a PAGE_SIZE basis.
|
||||
for_each_va_block_page_in_region(page_index, region) {
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, &dma_buffer->decrypt_iv[page_index]);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, PAGE_SIZE, &dma_buffer->decrypt_iv[page_index]);
|
||||
dma_buffer->key_version[page_index] = key_version;
|
||||
|
||||
// All but the first encryption can be pipelined. The first encryption
|
||||
// uses the caller's pipelining settings.
|
||||
@@ -3919,7 +3926,8 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
|
||||
status = uvm_conf_computing_cpu_decrypt(push->channel,
|
||||
cpu_page_address,
|
||||
staging_buffer,
|
||||
&dma_buffer->decrypt_iv[page_index],
|
||||
dma_buffer->decrypt_iv + page_index,
|
||||
dma_buffer->key_version[page_index],
|
||||
PAGE_SIZE,
|
||||
auth_tag_buffer);
|
||||
kunmap(dst_page);
|
||||
@@ -4037,7 +4045,7 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
|
||||
|
||||
UVM_ASSERT(dst_chunk);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) <= uvm_cpu_chunk_get_size(dst_chunk));
|
||||
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
|
||||
|
||||
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
|
||||
// copies using GPU CEs. So, we have to make sure that all previously
|
||||
@@ -5132,7 +5140,7 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *dst_resident_mask;
|
||||
uvm_page_mask_t *migrated_pages;
|
||||
uvm_page_mask_t *staged_pages;
|
||||
uvm_page_mask_t *first_touch_mask;
|
||||
uvm_page_mask_t *scratch_residency_mask;
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
@@ -5151,6 +5159,10 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
|
||||
|
||||
scratch_residency_mask = kmem_cache_alloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS);
|
||||
if (!scratch_residency_mask)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// For pages that are entering read-duplication we need to unmap remote
|
||||
// mappings and revoke RW and higher access permissions.
|
||||
//
|
||||
@@ -5177,12 +5189,12 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
status = block_prep_read_duplicate_mapping(va_block, va_block_context, src_id, region, preprocess_page_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = block_populate_pages(va_block, va_block_retry, va_block_context, dest_id, region, page_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
|
||||
status = block_copy_resident_pages(va_block,
|
||||
va_block_context,
|
||||
@@ -5192,22 +5204,17 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
prefetch_page_mask,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out;
|
||||
|
||||
// Pages that weren't resident anywhere else were populated at the
|
||||
// destination directly. Mark them as resident now, since there were no
|
||||
// errors from block_copy_resident_pages() above.
|
||||
// Note that va_block_context->scratch_page_mask is passed to
|
||||
// block_copy_set_first_touch_residency() which is generally unsafe but in
|
||||
// this case, block_copy_set_first_touch_residency() copies page_mask
|
||||
// before scratch_page_mask could be clobbered.
|
||||
migrated_pages = &va_block_context->make_resident.pages_migrated;
|
||||
first_touch_mask = &va_block_context->scratch_page_mask;
|
||||
uvm_page_mask_init_from_region(first_touch_mask, region, page_mask);
|
||||
uvm_page_mask_andnot(first_touch_mask, first_touch_mask, migrated_pages);
|
||||
uvm_page_mask_init_from_region(scratch_residency_mask, region, page_mask);
|
||||
uvm_page_mask_andnot(scratch_residency_mask, scratch_residency_mask, migrated_pages);
|
||||
|
||||
if (!uvm_page_mask_empty(first_touch_mask))
|
||||
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, first_touch_mask);
|
||||
if (!uvm_page_mask_empty(scratch_residency_mask))
|
||||
block_copy_set_first_touch_residency(va_block, va_block_context, dest_id, region, scratch_residency_mask);
|
||||
|
||||
staged_pages = &va_block_context->make_resident.pages_staged;
|
||||
if (!UVM_ID_IS_CPU(dest_id) && !uvm_page_mask_empty(staged_pages)) {
|
||||
@@ -5219,6 +5226,18 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
if (!uvm_page_mask_empty(migrated_pages)) {
|
||||
if (UVM_ID_IS_CPU(dest_id)) {
|
||||
// Check if the CPU is already in the resident set of processors.
|
||||
// We need to do this since we can't have multiple NUMA nodes with
|
||||
// resident pages.
|
||||
// If any of the migrate pages were already resident on the CPU, the
|
||||
// residency has to be switched to the destination NUMA node.
|
||||
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
|
||||
uvm_page_mask_and(scratch_residency_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE),
|
||||
migrated_pages)) {
|
||||
uvm_va_block_cpu_clear_resident_all_chunks(va_block, va_block_context, scratch_residency_mask);
|
||||
}
|
||||
|
||||
uvm_va_block_cpu_set_resident_all_chunks(va_block, va_block_context, migrated_pages);
|
||||
}
|
||||
else {
|
||||
@@ -5247,7 +5266,9 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
// Check state of all chunks after residency change.
|
||||
// TODO: Bug 4207783: Check both CPU and GPU chunks.
|
||||
UVM_ASSERT(block_check_cpu_chunks(va_block));
|
||||
return NV_OK;
|
||||
out:
|
||||
kmem_cache_free(g_uvm_page_mask_cache, scratch_residency_mask);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Looks up the current CPU mapping state of page from the
|
||||
@@ -5532,13 +5553,15 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
|
||||
*block->read_duplicated_pages.bitmap);
|
||||
|
||||
// Test read_duplicated_pages mask
|
||||
UVM_ASSERT_MSG((uvm_processor_mask_get_count(resident_processors) <= 1 &&
|
||||
!uvm_page_mask_test(&block->read_duplicated_pages, page_index)) ||
|
||||
(uvm_processor_mask_get_count(resident_processors) > 1 &&
|
||||
uvm_page_mask_test(&block->read_duplicated_pages, page_index)),
|
||||
UVM_ASSERT_MSG((!uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
|
||||
uvm_processor_mask_get_count(resident_processors) <= 1) ||
|
||||
(uvm_page_mask_test(&block->read_duplicated_pages, page_index) &&
|
||||
uvm_processor_mask_get_count(resident_processors) >= 1),
|
||||
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx - SWA: 0x%lx - RD: 0x%lx\n",
|
||||
*resident_processors->bitmap,
|
||||
*read_mappings->bitmap, *write_mappings->bitmap, *atomic_mappings->bitmap,
|
||||
*read_mappings->bitmap,
|
||||
*write_mappings->bitmap,
|
||||
*atomic_mappings->bitmap,
|
||||
*va_space->system_wide_atomics_enabled_processors.bitmap,
|
||||
*block->read_duplicated_pages.bitmap);
|
||||
|
||||
@@ -6022,7 +6045,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
|
||||
if (uvm_page_mask_empty(mapped_pages))
|
||||
return false;
|
||||
|
||||
return !uvm_id_equal(uvm_va_range_get_policy(block->va_range)->preferred_location, gpu_id);
|
||||
return !uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(block->va_range), gpu_id, NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
// Remote pages are pages which are mapped but not resident locally
|
||||
@@ -8365,6 +8388,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t resident_id,
|
||||
int resident_nid,
|
||||
uvm_page_mask_t *map_page_mask,
|
||||
uvm_prot_t new_prot,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@@ -8374,7 +8398,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
uvm_push_t push;
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *pages_to_map = &block_context->mapping.page_mask;
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, resident_nid);
|
||||
uvm_pte_bits_gpu_t pte_bit;
|
||||
uvm_pte_bits_gpu_t prot_pte_bit = get_gpu_pte_bit_index(new_prot);
|
||||
uvm_va_block_new_pte_state_t *new_pte_state = &block_context->mapping.new_pte_state;
|
||||
@@ -8383,8 +8407,10 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(map_page_mask);
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], gpu->id));
|
||||
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id))
|
||||
UVM_ASSERT(uvm_id_equal(resident_id, uvm_va_range_get_policy(va_block->va_range)->preferred_location));
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(va_block), gpu->id)) {
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
UVM_ASSERT(uvm_va_policy_preferred_location_equal(policy, resident_id, policy->preferred_nid));
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_page_mask_and(&block_context->scratch_page_mask,
|
||||
map_page_mask,
|
||||
@@ -8486,18 +8512,27 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
|
||||
return uvm_tracker_add_push_safe(out_tracker, &push);
|
||||
}
|
||||
|
||||
// allowed_nid_mask is only valid if the CPU is set in allowed_mask.
|
||||
static void map_get_allowed_destinations(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_processor_id_t id,
|
||||
uvm_processor_mask_t *allowed_mask)
|
||||
uvm_processor_mask_t *allowed_mask,
|
||||
nodemask_t *allowed_nid_mask)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
|
||||
*allowed_nid_mask = node_possible_map;
|
||||
|
||||
if (uvm_processor_mask_test(block_get_uvm_lite_gpus(block), id)) {
|
||||
// UVM-Lite can only map resident pages on the preferred location
|
||||
uvm_processor_mask_zero(allowed_mask);
|
||||
uvm_processor_mask_set(allowed_mask, policy->preferred_location);
|
||||
if (UVM_ID_IS_CPU(policy->preferred_location) &&
|
||||
!uvm_va_policy_preferred_location_equal(policy, UVM_ID_CPU, NUMA_NO_NODE)) {
|
||||
nodes_clear(*allowed_nid_mask);
|
||||
node_set(policy->preferred_nid, *allowed_nid_mask);
|
||||
}
|
||||
}
|
||||
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
|
||||
(uvm_id_equal(policy->preferred_location, id) &&
|
||||
@@ -8540,6 +8575,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
|
||||
NV_STATUS status = NV_OK;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
nodemask_t *allowed_nid_destinations;
|
||||
|
||||
va_block_context->mapping.cause = cause;
|
||||
|
||||
@@ -8589,10 +8625,20 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
if (!allowed_destinations)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
allowed_nid_destinations = uvm_kvmalloc(sizeof(*allowed_nid_destinations));
|
||||
if (!allowed_nid_destinations) {
|
||||
uvm_processor_mask_cache_free(allowed_destinations);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
// Map per resident location so we can more easily detect physically-
|
||||
// contiguous mappings.
|
||||
map_get_allowed_destinations(va_block, va_block_context, policy, id, allowed_destinations);
|
||||
|
||||
map_get_allowed_destinations(va_block,
|
||||
va_block_context,
|
||||
policy,
|
||||
id,
|
||||
allowed_destinations,
|
||||
allowed_nid_destinations);
|
||||
for_each_closest_id(resident_id, allowed_destinations, id, va_space) {
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
status = block_map_cpu_to(va_block,
|
||||
@@ -8603,11 +8649,30 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
}
|
||||
else if (UVM_ID_IS_CPU(resident_id)) {
|
||||
int nid;
|
||||
|
||||
// map_get_allowed_distinations() will set the mask of CPU NUMA
|
||||
// nodes that should be mapped.
|
||||
for_each_node_mask(nid, *allowed_nid_destinations) {
|
||||
status = block_map_gpu_to(va_block,
|
||||
va_block_context,
|
||||
gpu,
|
||||
resident_id,
|
||||
nid,
|
||||
running_page_mask,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = block_map_gpu_to(va_block,
|
||||
va_block_context,
|
||||
gpu,
|
||||
resident_id,
|
||||
NUMA_NO_NODE,
|
||||
running_page_mask,
|
||||
new_prot,
|
||||
out_tracker);
|
||||
@@ -8622,6 +8687,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
uvm_processor_mask_cache_free(allowed_destinations);
|
||||
uvm_kvfree(allowed_nid_destinations);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -11175,8 +11241,8 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
// so uvm_va_block_map will be a no-op.
|
||||
uvm_processor_mask_and(map_uvm_lite_gpus, map_other_processors, block_get_uvm_lite_gpus(va_block));
|
||||
if (!uvm_processor_mask_empty(map_uvm_lite_gpus) &&
|
||||
uvm_id_equal(new_residency, preferred_location)) {
|
||||
for_each_id_in_mask(map_processor_id, map_uvm_lite_gpus) {
|
||||
uvm_va_policy_preferred_location_equal(policy, new_residency, va_block_context->make_resident.dest_nid)) {
|
||||
for_each_id_in_mask (map_processor_id, map_uvm_lite_gpus) {
|
||||
status = uvm_va_block_map(va_block,
|
||||
va_block_context,
|
||||
map_processor_id,
|
||||
@@ -11637,6 +11703,10 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
|
||||
// For GPU faults, the bottom half is pinned to CPUs closest to their GPU.
|
||||
// Therefore, in both cases, we can use numa_mem_id() to get the NUMA node
|
||||
// ID of the faulting processor.
|
||||
// Note that numa_mem_id() returns the nearest node with memory. In most
|
||||
// cases, this will be the current NUMA node. However, in the case that the
|
||||
// current node does not have any memory, we probably want the nearest node
|
||||
// with memory, anyway.
|
||||
int current_nid = numa_mem_id();
|
||||
bool may_read_duplicate = can_read_duplicate(va_block, page_index, policy, thrashing_hint);
|
||||
|
||||
@@ -11660,7 +11730,12 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
|
||||
// If read duplication is enabled and the page is also resident on the CPU,
|
||||
// keep its current NUMA node residency.
|
||||
if (may_read_duplicate && uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
|
||||
return block_get_page_node_residency(va_block, page_index);
|
||||
return NUMA_NO_NODE;
|
||||
|
||||
// The new_residency processor is the CPU and the preferred location is not
|
||||
// the CPU. If the page is resident on the CPU, keep its current residency.
|
||||
if (uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index))
|
||||
return NUMA_NO_NODE;
|
||||
|
||||
return current_nid;
|
||||
}
|
||||
@@ -12564,125 +12639,6 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
|
||||
}
|
||||
|
||||
// Launch a synchronous, encrypted copy between GPU and CPU.
|
||||
//
|
||||
// The copy entails a GPU-side encryption (relying on the Copy Engine), and a
|
||||
// CPU-side decryption step, such that the destination CPU buffer pointed by
|
||||
// dst_plain will contain the unencrypted (plain text) contents. The destination
|
||||
// buffer can be in protected or unprotected sysmem, while the source buffer
|
||||
// must be in protected vidmem.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmCslIv decrypt_iv;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, &decrypt_iv);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel, dst_plain, src_cipher, &decrypt_iv, size, auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
static NV_STATUS encrypted_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
@@ -12695,14 +12651,14 @@ static NV_STATUS va_block_write_cpu_to_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_address_t src_gpu_address;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
return encrypted_memcopy_cpu_to_gpu(gpu,
|
||||
dst_gpu_address,
|
||||
uvm_mem_get_cpu_addr_kernel(src_mem),
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted write to [0x%llx, 0x%llx)",
|
||||
dst,
|
||||
dst + size);
|
||||
return uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
dst_gpu_address,
|
||||
uvm_mem_get_cpu_addr_kernel(src_mem),
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted write to [0x%llx, 0x%llx)",
|
||||
dst,
|
||||
dst + size);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
@@ -12799,14 +12755,14 @@ static NV_STATUS va_block_read_gpu_to_cpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_address_t dst_gpu_address;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
return encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
uvm_mem_get_cpu_addr_kernel(dst_mem),
|
||||
src_gpu_address,
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted read from [0x%llx, 0x%llx)",
|
||||
src,
|
||||
src + size);
|
||||
return uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
uvm_mem_get_cpu_addr_kernel(dst_mem),
|
||||
src_gpu_address,
|
||||
size,
|
||||
&va_block->tracker,
|
||||
"Encrypted read from [0x%llx, 0x%llx)",
|
||||
src,
|
||||
src + size);
|
||||
}
|
||||
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
|
||||
@@ -105,6 +105,12 @@ bool uvm_va_policy_preferred_location_equal(const uvm_va_policy_t *policy, uvm_p
|
||||
{
|
||||
bool equal = uvm_id_equal(policy->preferred_location, proc);
|
||||
|
||||
if (!UVM_ID_IS_CPU(policy->preferred_location))
|
||||
UVM_ASSERT(policy->preferred_nid == NUMA_NO_NODE);
|
||||
|
||||
if (!UVM_ID_IS_CPU(proc))
|
||||
UVM_ASSERT(cpu_numa_id == NUMA_NO_NODE);
|
||||
|
||||
if (equal && UVM_ID_IS_CPU(policy->preferred_location))
|
||||
equal = uvm_numa_id_eq(policy->preferred_nid, cpu_numa_id);
|
||||
|
||||
@@ -656,7 +662,7 @@ const uvm_va_policy_t *uvm_va_policy_set_preferred_location(uvm_va_block_t *va_b
|
||||
// and that the policy is changing.
|
||||
UVM_ASSERT(node->node.start >= start);
|
||||
UVM_ASSERT(node->node.end <= end);
|
||||
UVM_ASSERT(!uvm_id_equal(node->policy.preferred_location, processor_id));
|
||||
UVM_ASSERT(!uvm_va_policy_preferred_location_equal(&node->policy, processor_id, cpu_node_id));
|
||||
}
|
||||
|
||||
node->policy.preferred_location = processor_id;
|
||||
|
||||
@@ -868,9 +868,9 @@ static void uvm_va_range_disable_peer_managed(uvm_va_range_t *va_range, uvm_gpu_
|
||||
// preferred location. If peer mappings are being disabled to the
|
||||
// preferred location, then unmap the other GPU.
|
||||
// Nothing to do otherwise.
|
||||
if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu0->id))
|
||||
if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu0->id, NUMA_NO_NODE))
|
||||
uvm_lite_gpu_to_unmap = gpu1;
|
||||
else if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu1->id))
|
||||
else if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu1->id, NUMA_NO_NODE))
|
||||
uvm_lite_gpu_to_unmap = gpu0;
|
||||
else
|
||||
return;
|
||||
@@ -951,7 +951,7 @@ static void va_range_unregister_gpu_managed(uvm_va_range_t *va_range, uvm_gpu_t
|
||||
// Reset preferred location and accessed-by of VA ranges if needed
|
||||
// Note: ignoring the return code of uvm_va_range_set_preferred_location since this
|
||||
// will only return on error when setting a preferred location, not on a reset
|
||||
if (uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, gpu->id))
|
||||
if (uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), gpu->id, NUMA_NO_NODE))
|
||||
(void)uvm_va_range_set_preferred_location(va_range, UVM_ID_INVALID, NUMA_NO_NODE, mm, NULL);
|
||||
|
||||
uvm_va_range_unset_accessed_by(va_range, gpu->id, NULL);
|
||||
@@ -1683,7 +1683,7 @@ void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
|
||||
// If a UVM-Lite GPU is being removed from the accessed_by mask, it will
|
||||
// also stop being a UVM-Lite GPU unless it's also the preferred location.
|
||||
if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id) &&
|
||||
!uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, processor_id)) {
|
||||
!uvm_va_policy_preferred_location_equal(uvm_va_range_get_policy(va_range), processor_id, NUMA_NO_NODE)) {
|
||||
range_unmap(va_range, processor_id, out_tracker);
|
||||
}
|
||||
|
||||
|
||||
42
kernel-open/nvidia/libspdm_internal_crypt_lib.c
Normal file
42
kernel-open/nvidia/libspdm_internal_crypt_lib.c
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
|
||||
* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
|
||||
*/
|
||||
|
||||
#include "os-interface.h"
|
||||
#include "internal_crypt_lib.h"
|
||||
#include "library/cryptlib.h"
|
||||
|
||||
bool libspdm_check_crypto_backend(void)
|
||||
{
|
||||
#ifdef USE_LKCA
|
||||
nv_printf(NV_DBG_INFO, "libspdm_check_crypto_backend: LKCA wrappers found.\n");
|
||||
nv_printf(NV_DBG_INFO, "libspdm_check_crypto_backend: LKCA calls may still fail if modules have not been loaded!\n");
|
||||
return true;
|
||||
#else
|
||||
nv_printf(NV_DBG_ERRORS, "libspdm_check_crypto_backend: Error - libspdm expects LKCA but found stubs!\n");
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -37,6 +37,10 @@
|
||||
#include <linux/kernfs.h>
|
||||
#endif
|
||||
|
||||
#if !defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
|
||||
#include <linux/iommu.h>
|
||||
#endif
|
||||
|
||||
static void
|
||||
nv_check_and_exclude_gpu(
|
||||
nvidia_stack_t *sp,
|
||||
@@ -530,35 +534,21 @@ nv_pci_probe
|
||||
if (pci_dev->is_virtfn)
|
||||
{
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
nvl = pci_get_drvdata(pci_dev->physfn);
|
||||
if (!nvl)
|
||||
|
||||
#if defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
|
||||
if (pci_dev->dev.bus->iommu_ops == NULL)
|
||||
#else
|
||||
if ((pci_dev->dev.iommu != NULL) && (pci_dev->dev.iommu->iommu_dev != NULL) &&
|
||||
(pci_dev->dev.iommu->iommu_dev->ops == NULL))
|
||||
#endif
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
|
||||
"since PF is not bound to nvidia driver.\n",
|
||||
"since IOMMU is not present on the system.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (pci_dev->dev.bus->iommu_ops == NULL)
|
||||
{
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
if (rm_is_iommu_needed_for_sriov(sp, nv))
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
|
||||
"since IOMMU is not present on the system.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
nv_kmem_cache_free_stack(sp);
|
||||
return 0;
|
||||
#else
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -45,6 +45,11 @@ typedef struct gpuObject *gpuObjectHandle;
|
||||
|
||||
typedef struct gpuRetainedChannel_struct gpuRetainedChannel;
|
||||
|
||||
|
||||
NV_STATUS calculatePCIELinkRateMBps(NvU32 lanes,
|
||||
NvU32 pciLinkMaxSpeed,
|
||||
NvU32 *pcieLinkRate);
|
||||
|
||||
NV_STATUS nvGpuOpsCreateSession(struct gpuSession **session);
|
||||
|
||||
NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session);
|
||||
@@ -286,11 +291,11 @@ NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
// Interface used for CCSL
|
||||
|
||||
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
|
||||
gpuChannelHandle channel);
|
||||
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
|
||||
NV_STATUS nvGpuOpsCcslContextUpdate(struct ccslContext_t *ctx);
|
||||
NV_STATUS nvGpuOpsCcslRotateKey(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
|
||||
NvU8 direction);
|
||||
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
|
||||
@@ -308,6 +313,7 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
NvU8 const *decryptIv,
|
||||
NvU32 keyRotationId,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
@@ -323,7 +329,8 @@ NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
|
||||
NvU8 direction,
|
||||
NvU64 increment,
|
||||
NvU8 *iv);
|
||||
NV_STATUS nvGpuOpsLogDeviceEncryption(struct ccslContext_t *ctx,
|
||||
NvU32 bufferSize);
|
||||
NV_STATUS nvGpuOpsLogEncryption(struct ccslContext_t *ctx,
|
||||
NvU8 direction,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif /* _NV_GPU_OPS_H_*/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -1516,16 +1516,23 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext)
|
||||
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
nvidia_stack_t *sp;
|
||||
|
||||
status = rm_gpu_ops_ccsl_context_update(sp, uvmCslContext->ctx);
|
||||
if ((contextList == NULL) || (contextListCount == 0) || (contextList[0] == NULL))
|
||||
{
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
sp = contextList[0]->nvidia_stack;
|
||||
status = rm_gpu_ops_ccsl_rotate_key(sp, contextList, contextListCount);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslUpdateContext);
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslRotateKey);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation)
|
||||
@@ -1562,6 +1569,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU32 keyRotationId,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
@@ -1575,6 +1583,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
bufferSize,
|
||||
inputBuffer,
|
||||
(NvU8 *)decryptIv,
|
||||
keyRotationId,
|
||||
outputBuffer,
|
||||
addAuthData,
|
||||
addAuthDataSize,
|
||||
@@ -1625,17 +1634,18 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslIncrementIv);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize)
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_log_device_encryption(sp, uvmCslContext->ctx, bufferSize);
|
||||
status = rm_gpu_ops_ccsl_log_encryption(sp, uvmCslContext->ctx, operation, bufferSize);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslLogExternalEncryption);
|
||||
EXPORT_SYMBOL(nvUvmInterfaceCslLogEncryption);
|
||||
|
||||
#else // NV_UVM_ENABLE
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ NVIDIA_SOURCES += nvidia/libspdm_rsa.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_internal_crypt_lib.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_ec.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_x509.c
|
||||
|
||||
@@ -161,7 +161,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_enable_atomic_ops_to_root
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed
|
||||
@@ -228,6 +228,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_alloc_me
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops
|
||||
@@ -251,6 +252,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -38,4 +38,4 @@ bool libspdm_aead_aes_gcm_decrypt_prealloc(void *context,
|
||||
const uint8_t *data_in, size_t data_in_size,
|
||||
const uint8_t *tag, size_t tag_size,
|
||||
uint8_t *data_out, size_t *data_out_size);
|
||||
|
||||
bool libspdm_check_crypto_backend(void);
|
||||
|
||||
@@ -36,10 +36,28 @@ static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long *pfn)
|
||||
{
|
||||
#if defined(NV_UNSAFE_FOLLOW_PFN_PRESENT)
|
||||
return unsafe_follow_pfn(vma, address, pfn);
|
||||
#else
|
||||
#if defined(NV_FOLLOW_PFN_PRESENT)
|
||||
return follow_pfn(vma, address, pfn);
|
||||
#else
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
int status = 0;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
return status;
|
||||
|
||||
status = follow_pte(vma, address, &ptep, &ptl);
|
||||
if (status)
|
||||
return status;
|
||||
*pfn = pte_pfn(ptep_get(ptep));
|
||||
|
||||
// The lock is acquired inside follow_pte()
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
return -1;
|
||||
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user