mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-26 19:19:47 +00:00
550.54.14
This commit is contained in:
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -621,6 +621,14 @@ typedef enum
|
||||
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
/*
|
||||
* For console setup by EFI GOP, the base address is BAR1.
|
||||
* For console setup by VBIOS, the base address is BAR2 + 16MB.
|
||||
*/
|
||||
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
|
||||
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
|
||||
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iommus.iso_iommu_present)
|
||||
|
||||
@@ -878,6 +886,8 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
|
||||
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
|
||||
|
||||
struct dma_buf;
|
||||
typedef struct nv_dma_buf nv_dma_buf_t;
|
||||
struct drm_gem_object;
|
||||
|
||||
@@ -956,12 +956,20 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
- This function should not be called when interrupts are disabled.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pFaultInfo[IN] - information provided by RM for fault handling.
|
||||
used for obtaining the device handle without locks.
|
||||
bCopyAndFlush[IN] - Instructs RM to perform the flush in the Copy+Flush mode.
|
||||
In this mode, RM will perform a copy of the packets from
|
||||
the HW buffer to UVM's SW buffer as part of performing
|
||||
the flush. This mode gives UVM the opportunity to observe
|
||||
the packets contained within the HW buffer at the time
|
||||
of issuing the call.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTogglePrefetchFaults
|
||||
@@ -982,7 +990,8 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@@ -700,8 +700,10 @@ typedef struct UvmGpuInfo_tag
|
||||
// local EGM properties
|
||||
// NV_TRUE if EGM is enabled
|
||||
NvBool egmEnabled;
|
||||
|
||||
// Peer ID to reach local EGM when EGM is enabled
|
||||
NvU8 egmPeerId;
|
||||
|
||||
// EGM base address to offset in the GMMU PTE entry for EGM mappings
|
||||
NvU64 egmBaseAddr;
|
||||
} UvmGpuInfo;
|
||||
@@ -712,9 +714,10 @@ typedef struct UvmGpuFbInfo_tag
|
||||
// RM regions that are not registered with PMA either.
|
||||
NvU64 maxAllocatableAddress;
|
||||
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
typedef struct UvmGpuEccInfo_tag
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -160,10 +160,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
|
||||
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
|
||||
NvBool NV_API_CALL os_is_grid_supported (void);
|
||||
NvU32 NV_API_CALL os_get_grid_csp_support (void);
|
||||
void NV_API_CALL os_get_screen_info (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
|
||||
void NV_API_CALL os_bug_check (NvU32, const char *);
|
||||
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
|
||||
@@ -198,6 +197,8 @@ nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry (nv_cap_t *, const char *,
|
||||
void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
|
||||
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
|
||||
NvS32 NV_API_CALL os_imex_channel_count (void);
|
||||
|
||||
enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
|
||||
@@ -219,6 +220,7 @@ extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
|
||||
@@ -75,7 +75,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
|
||||
@@ -96,5 +96,6 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
soc/tegra/bpmp.h \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 9
|
||||
#define UVM_API_LATEST_REVISION 11
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@@ -297,7 +297,9 @@ NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU for which pageable memory access support is queried.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition for which
|
||||
// pageable memory access support is queried.
|
||||
//
|
||||
// pageableMemAccess: (OUTPUT)
|
||||
// Returns true (non-zero) if the GPU represented by gpuUuid supports
|
||||
@@ -327,6 +329,12 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
|
||||
// process results in an error.
|
||||
//
|
||||
// After successfully registering a GPU partition, all subsequent API calls
|
||||
// which take a NvProcessorUuid argument (including UvmGpuMappingAttributes),
|
||||
// must use the GI partition UUID which can be obtained with
|
||||
// NvRmControl(NVC637_CTRL_CMD_GET_UUID). Otherwise, if the GPU is not SMC
|
||||
// capable or SMC enabled, the physical GPU UUID must be used.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU to register.
|
||||
@@ -431,7 +439,8 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to unregister.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@@ -489,7 +498,8 @@ NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to register.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// On Linux: RM ctrl fd, hClient and hVaSpace.
|
||||
@@ -560,7 +570,9 @@ NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid *gpuUuid,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU whose VA space should be unregistered.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition whose VA space
|
||||
// should be unregistered.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@@ -590,7 +602,7 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// The two GPUs must be connected via PCIe. An error is returned if the GPUs are
|
||||
// not connected or are connected over an interconnect different than PCIe
|
||||
// (NVLink, for example).
|
||||
// (NVLink or SMC partitions, for example).
|
||||
//
|
||||
// If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
|
||||
// must support the same set of page sizes for GPU mappings.
|
||||
@@ -603,10 +615,12 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidA: (INPUT)
|
||||
// UUID of GPU A.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition A.
|
||||
//
|
||||
// gpuUuidB: (INPUT)
|
||||
// UUID of GPU B.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition B.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NO_MEMORY:
|
||||
@@ -652,10 +666,12 @@ NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidA: (INPUT)
|
||||
// UUID of GPU A.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition A.
|
||||
//
|
||||
// gpuUuidB: (INPUT)
|
||||
// UUID of GPU B.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition B.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@@ -700,7 +716,9 @@ NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU that the channel is associated with.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition that the channel is
|
||||
// associated with.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// On Linux: RM ctrl fd, hClient and hChannel.
|
||||
@@ -1139,11 +1157,14 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location for this VA range.
|
||||
// UUID of the CPU, UUID of the physical GPU if the GPU is not SMC
|
||||
// capable or SMC enabled, or the GPU instance UUID of the partition of
|
||||
// the preferred location for this VA range.
|
||||
//
|
||||
// accessedByUuids: (INPUT)
|
||||
// UUIDs of all processors that should have persistent mappings to this
|
||||
// VA range.
|
||||
// UUID of the CPU, UUID of the physical GPUs if the GPUs are not SMC
|
||||
// capable or SMC enabled, or the GPU instance UUID of the partitions
|
||||
// that should have persistent mappings to this VA range.
|
||||
//
|
||||
// accessedByCount: (INPUT)
|
||||
// Number of elements in the accessedByUuids array.
|
||||
@@ -1421,7 +1442,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
@@ -1499,7 +1522,9 @@ NV_STATUS UvmMigrate(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
@@ -1576,7 +1601,9 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
// Id of the range group whose associated VA ranges have to be migrated.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_OBJECT_NOT_FOUND:
|
||||
@@ -1938,7 +1965,9 @@ NV_STATUS UvmMapExternalAllocation(void *base,
|
||||
//
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to map the sparse region on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to map the sparse
|
||||
// region on.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -1995,7 +2024,9 @@ NV_STATUS UvmMapExternalSparse(void *base,
|
||||
// The length of the virtual address range.
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unmap the VA range from.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to unmap the VA
|
||||
// range from.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -2062,7 +2093,9 @@ NV_STATUS UvmUnmapExternalAllocation(void *base,
|
||||
// supported by the GPU.
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to map the dynamic parallelism region on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to map the
|
||||
// dynamic parallelism region on.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_UVM_ADDRESS_IN_USE:
|
||||
@@ -2293,7 +2326,9 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// preferred location.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
@@ -2469,8 +2504,9 @@ NV_STATUS UvmUnsetPreferredLocation(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// accessedByUuid: (INPUT)
|
||||
// UUID of the processor that should have pages in the the VA range
|
||||
// mapped when possible.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// that should have pages in the VA range mapped when possible.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -2538,8 +2574,10 @@ NV_STATUS UvmSetAccessedBy(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// accessedByUuid: (INPUT)
|
||||
// UUID of the processor from which any policies set by
|
||||
// UvmSetAccessedBy should be revoked for the given VA range.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// from which any policies set by UvmSetAccessedBy should be revoked
|
||||
// for the given VA range.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -2597,7 +2635,9 @@ NV_STATUS UvmUnsetAccessedBy(void *base,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to enable software-assisted system-wide atomics on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to enable
|
||||
// software-assisted system-wide atomics on.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NO_MEMORY:
|
||||
@@ -2633,7 +2673,9 @@ NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to disable software-assisted system-wide atomics on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to disable
|
||||
// software-assisted system-wide atomics on.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@@ -2862,7 +2904,9 @@ NV_STATUS UvmDebugCountersEnable(UvmDebugSession session,
|
||||
// Name of the counter in that scope.
|
||||
//
|
||||
// gpu: (INPUT)
|
||||
// Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition of the scoped GPU.
|
||||
// This parameter is ignored in AllGpu scopes.
|
||||
//
|
||||
// pCounterHandle: (OUTPUT)
|
||||
// Handle to the counter address.
|
||||
@@ -2916,7 +2960,7 @@ NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session,
|
||||
// UvmEventQueueCreate
|
||||
//
|
||||
// This call creates an event queue of the given size.
|
||||
// No events are added in the queue till they are enabled by the user.
|
||||
// No events are added in the queue until they are enabled by the user.
|
||||
// Event queue data is visible to the user even after the target process dies
|
||||
// if the session is active and queue is not freed.
|
||||
//
|
||||
@@ -2967,7 +3011,7 @@ NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle,
|
||||
// UvmEventQueueDestroy
|
||||
//
|
||||
// This call frees all interal resources associated with the queue, including
|
||||
// upinning of the memory associated with that queue. Freeing user buffer is
|
||||
// unpinning of the memory associated with that queue. Freeing user buffer is
|
||||
// responsibility of a caller. Event queue might be also destroyed as a side
|
||||
// effect of destroying a session associated with this queue.
|
||||
//
|
||||
@@ -3151,9 +3195,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
// UvmEventGetGpuUuidTable
|
||||
//
|
||||
// Each migration event entry contains the gpu index to/from where data is
|
||||
// migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
|
||||
// Using indices saves on the size of each event entry. This API provides the
|
||||
// gpuIndex to gpuUuid relation to the user.
|
||||
// migrated. This index maps to a corresponding physical gpu UUID in the
|
||||
// gpuUuidTable. Using indices saves on the size of each event entry. This API
|
||||
// provides the gpuIndex to gpuUuid relation to the user.
|
||||
//
|
||||
// This API does not access the queue state maintained in the user
|
||||
// library and so the user doesn't need to acquire a lock to protect the
|
||||
@@ -3161,9 +3205,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidTable: (OUTPUT)
|
||||
// The return value is an array of UUIDs. The array index is the
|
||||
// corresponding gpuIndex. There can be at max 32 gpus associated with
|
||||
// UVM, so array size is 32.
|
||||
// The return value is an array of physical GPU UUIDs. The array index
|
||||
// is the corresponding gpuIndex. There can be at max 32 GPUs
|
||||
// associated with UVM, so array size is 32.
|
||||
//
|
||||
// validCount: (OUTPUT)
|
||||
// The system doesn't normally contain 32 GPUs. This field gives the
|
||||
@@ -3222,7 +3266,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventEntry *pBuffer,
|
||||
UvmEventEntry_V1 *pBuffer,
|
||||
NvU64 *nEntries);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -3418,10 +3462,15 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
|
||||
//
|
||||
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
|
||||
// sizeof(UvmToolsEventControlData_V2).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
// sizeof(UvmEventEntry_V2).
|
||||
NvLength UvmToolsGetEventEntrySize(void);
|
||||
#endif
|
||||
|
||||
NvLength UvmToolsGetNumberOfCounters(void);
|
||||
|
||||
@@ -3436,6 +3485,12 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsEventControlData_V2::version records the entry version that
|
||||
// will be generated.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
@@ -3447,10 +3502,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). One could call
|
||||
// UvmToolsGetEventControlSize() function to find out current size of
|
||||
// UvmToolsEventControlData. Gets pinned until queue is destroyed.
|
||||
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
|
||||
// UvmToolsEventControlData_V2 (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
// Handle to the created queue.
|
||||
@@ -3460,22 +3514,32 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate a queue of requested size. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory). Also it could not
|
||||
// have been possible to create UvmToolsEventQueueDescriptor.
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate a queue of requested size. Another
|
||||
// would be either event_buffer or event_control memory couldn't be
|
||||
// pinned (e.g. because of OS limitation of pinnable memory). Also it
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#else
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#endif
|
||||
|
||||
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
|
||||
|
||||
@@ -3512,7 +3576,7 @@ NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsDestroyEventQueue
|
||||
//
|
||||
// Destroys all internal resources associated with the queue. It unpinns the
|
||||
// Destroys all internal resources associated with the queue. It unpins the
|
||||
// buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
|
||||
// destroyed when corresponding session gets destroyed.
|
||||
//
|
||||
@@ -3534,7 +3598,7 @@ NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
|
||||
// UvmEventQueueEnableEvents
|
||||
//
|
||||
// This call enables a particular event type in the event queue. All events are
|
||||
// disabled by default. Any event type is considered listed if and only if it's
|
||||
// disabled by default. Any event type is considered listed if and only if its
|
||||
// corresponding value is equal to 1 (in other words, bit is set). Disabled
|
||||
// events listed in eventTypeFlags are going to be enabled. Enabled events and
|
||||
// events not listed in eventTypeFlags are not affected by this call.
|
||||
@@ -3567,7 +3631,7 @@ NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
|
||||
// UvmToolsEventQueueDisableEvents
|
||||
//
|
||||
// This call disables a particular event type in the event queue. Any event type
|
||||
// is considered listed if and only if it's corresponding value is equal to 1
|
||||
// is considered listed if and only if its corresponding value is equal to 1
|
||||
// (in other words, bit is set). Enabled events listed in eventTypeFlags are
|
||||
// going to be disabled. Disabled events and events not listed in eventTypeFlags
|
||||
// are not affected by this call.
|
||||
@@ -3605,7 +3669,7 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
|
||||
//
|
||||
// Counters position follows the layout of the memory that UVM driver decides to
|
||||
// use. To obtain particular counter value, user should perform consecutive
|
||||
// atomic reads at a a given buffer + offset address.
|
||||
// atomic reads at a given buffer + offset address.
|
||||
//
|
||||
// It is not defined what is the initial value of a counter. User should rely on
|
||||
// a difference between each snapshot.
|
||||
@@ -3628,9 +3692,9 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
|
||||
// Provided session is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate counters structure. Another would be that
|
||||
// either event_buffer or event_control memory couldn't be pinned
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate counters structure. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory)
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -3641,12 +3705,12 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateProcessorCounters
|
||||
//
|
||||
// Creates the counters structure for tracking per-process counters.
|
||||
// Creates the counters structure for tracking per-processor counters.
|
||||
// These counters are disabled by default.
|
||||
//
|
||||
// Counters position follows the layout of the memory that UVM driver decides to
|
||||
// use. To obtain particular counter value, user should perform consecutive
|
||||
// atomic reads at a a given buffer + offset address.
|
||||
// atomic reads at a given buffer + offset address.
|
||||
//
|
||||
// It is not defined what is the initial value of a counter. User should rely on
|
||||
// a difference between each snapshot.
|
||||
@@ -3662,7 +3726,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
// counters are destroyed.
|
||||
//
|
||||
// processorUuid: (INPUT)
|
||||
// UUID of the resource, for which counters will provide statistic data.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID of
|
||||
// the resource, for which counters will provide statistic data.
|
||||
//
|
||||
// counters: (OUTPUT)
|
||||
// Handle to the created counters.
|
||||
@@ -3672,9 +3738,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
// session handle does not refer to a valid tools session
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate counters structure. Another would be that
|
||||
// either event_buffer or event_control memory couldn't be pinned
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate counters structure. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory)
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT
|
||||
@@ -3690,7 +3756,7 @@ NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle session,
|
||||
// UvmToolsDestroyCounters
|
||||
//
|
||||
// Destroys all internal resources associated with this counters structure.
|
||||
// It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
|
||||
// It unpins the buffer provided in UvmToolsCreate*Counters. Counters structure
|
||||
// also gest destroyed when corresponding session is destroyed.
|
||||
//
|
||||
// Arguments:
|
||||
@@ -3711,7 +3777,7 @@ NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
|
||||
// UvmToolsEnableCounters
|
||||
//
|
||||
// This call enables certain counter types in the counters structure. Any
|
||||
// counter type is considered listed if and only if it's corresponding value is
|
||||
// counter type is considered listed if and only if its corresponding value is
|
||||
// equal to 1 (in other words, bit is set). Disabled counter types listed in
|
||||
// counterTypeFlags are going to be enabled. Already enabled counter types and
|
||||
// counter types not listed in counterTypeFlags are not affected by this call.
|
||||
@@ -3745,7 +3811,7 @@ NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
|
||||
// UvmToolsDisableCounters
|
||||
//
|
||||
// This call disables certain counter types in the counters structure. Any
|
||||
// counter type is considered listed if and only if it's corresponding value is
|
||||
// counter type is considered listed if and only if its corresponding value is
|
||||
// equal to 1 (in other words, bit is set). Enabled counter types listed in
|
||||
// counterTypeFlags are going to be disabled. Already disabled counter types and
|
||||
// counter types not listed in counterTypeFlags are not affected by this call.
|
||||
@@ -3890,32 +3956,72 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains registered,
|
||||
// its index in the table does not change. New registrations obtain the first
|
||||
// unused index.
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
// registered, its index in the table does not change.
|
||||
// Note that the index in the table corresponds to the processor ID reported
|
||||
// in UvmEventEntry event records and that the table is not contiguously packed
|
||||
// with non-zero UUIDs even with no GPU unregistrations.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for the UUID table returned. The version must
|
||||
// match the requested version of the event queue created with
|
||||
// UvmToolsCreateEventQueue().
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The srcIndex and dstIndex fields of the
|
||||
// UvmEventMigrationInfo struct index this array. Unused indices will
|
||||
// have a UUID of zero.
|
||||
// have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
|
||||
// for the UUID of the physical GPU and only supports a single SMC
|
||||
// partition registered per process. Version UvmEventEntry_V2 supports
|
||||
// multiple SMC partitions registered per process and uses physical GPU
|
||||
// UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
|
||||
// UUIDs for SMC partitions.
|
||||
// The table pointer can be NULL in which case, the size of the table
|
||||
// needed to hold all the UUIDs is returned in 'count'.
|
||||
//
|
||||
// table_size: (INPUT)
|
||||
// The size of the table in number of array elements. This can be
|
||||
// zero if the table pointer is NULL.
|
||||
//
|
||||
// count: (OUTPUT)
|
||||
// Set by UVM to the number of UUIDs written, including any gaps in
|
||||
// the table due to unregistered GPUs.
|
||||
// On output, it is set by UVM to the number of UUIDs needed to hold
|
||||
// all the UUIDs, including any gaps in the table due to unregistered
|
||||
// GPUs.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
// writing to table failed or the count pointer was invalid.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// The count pointer is NULL.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
//
|
||||
// NV_WARN_MISMATCHED_TARGET:
|
||||
// The kernel returned a table suitable for UvmEventEntry_V1 events.
|
||||
// (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table,
|
||||
NvLength *count);
|
||||
#else
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table,
|
||||
NvLength table_size,
|
||||
NvLength *count);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsFlushEvents
|
||||
|
||||
@@ -34,16 +34,6 @@
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
@@ -246,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
@@ -284,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
@@ -370,6 +370,8 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#else
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
#endif
|
||||
|
||||
return status;
|
||||
@@ -403,21 +405,24 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
// Residency mask needs to be computed even if prefetching is disabled since
|
||||
// the residency information is also needed by access counters servicing in
|
||||
// uvm_ats_service_access_counters()
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
@@ -637,8 +642,18 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
|
||||
// is just an optimization and servicing access counter migrations is still
|
||||
// worthwhile even without any prefetching added. So, let servicing continue
|
||||
// instead of returning early even if the prefetch computation fails.
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
// Remove pages which are already resident at the intended destination from
|
||||
// the accessed_mask.
|
||||
uvm_page_mask_andnot(&ats_context->accessed_mask,
|
||||
&ats_context->accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
|
||||
@@ -318,10 +318,11 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
|
||||
unsigned i;
|
||||
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
|
||||
return *buffer = 0;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
|
||||
|
||||
@@ -151,22 +151,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
mem_info->uuid = gpu->parent->uuid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -197,11 +181,6 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, ¶ms->gpu_uuid), done);
|
||||
|
||||
TEST_CHECK_GOTO((memory_info.size == params->size), done);
|
||||
@@ -309,11 +288,6 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
|
||||
|
||||
memset(pte_buffer, 0, sizeof(pte_buffer));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -314,7 +314,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// interrupts in the bottom half in the future, the bottom half flush
|
||||
// below will no longer be able to guarantee that all outstanding
|
||||
// notifications have been handled.
|
||||
uvm_gpu_access_counters_set_ignore(gpu, true);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);
|
||||
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
|
||||
|
||||
@@ -373,13 +373,13 @@ static NV_STATUS uvm_resume(void)
|
||||
|
||||
// Bring the fault buffer software state back in sync with the
|
||||
// hardware state.
|
||||
uvm_gpu_fault_buffer_resume(gpu->parent);
|
||||
uvm_parent_gpu_fault_buffer_resume(gpu->parent);
|
||||
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
|
||||
|
||||
// Reenable access counter interrupt processing unless notifications
|
||||
// have been set to be suppressed.
|
||||
uvm_gpu_access_counters_set_ignore(gpu, false);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
|
||||
}
|
||||
|
||||
uvm_up_write(&g_uvm_global.pm.lock);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -59,6 +59,7 @@ MODULE_PARM_DESC(uvm_peer_copy, "Choose the addressing mode for peer copying, op
|
||||
|
||||
static void remove_gpu(uvm_gpu_t *gpu);
|
||||
static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu);
|
||||
static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu);
|
||||
static void destroy_nvlink_peers(uvm_gpu_t *gpu);
|
||||
|
||||
@@ -241,6 +242,8 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
}
|
||||
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -843,11 +846,11 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
if (!uvm_procfs_is_enabled())
|
||||
return NV_OK;
|
||||
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), uvm_gpu_uuid(gpu));
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->parent->uuid);
|
||||
|
||||
gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();
|
||||
|
||||
// Create UVM-GPU-${UUID}/${sub_processor_index} directory
|
||||
// Create UVM-GPU-${physical-UUID}/${sub_processor_index} directory
|
||||
snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
|
||||
@@ -855,7 +858,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
// Create symlink from ${gpu_id} to
|
||||
// gpus/UVM-GPU-${UUID}/${sub_processor_index}
|
||||
// UVM-GPU-${physical-UUID}/${sub_processor_index}
|
||||
snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
|
||||
snprintf(gpu_dir_name,
|
||||
sizeof(gpu_dir_name),
|
||||
@@ -867,6 +870,16 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
if (gpu->procfs.dir_symlink == NULL)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (gpu->parent->smc.enabled) {
|
||||
// Create symlink from UVM-GPU-${GI-UUID} to
|
||||
// UVM-GPU-${physical-UUID}/${sub_processor_index}
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->uuid);
|
||||
|
||||
gpu->procfs.gpu_instance_uuid_symlink = proc_symlink(uuid_text_buffer, gpu_base_dir_entry, gpu_dir_name);
|
||||
if (gpu->procfs.gpu_instance_uuid_symlink == NULL)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
}
|
||||
|
||||
// GPU peer files are debug only
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
@@ -882,6 +895,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.gpu_instance_uuid_symlink);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
@@ -1038,6 +1052,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
NvU32 num_entries;
|
||||
NvU64 va_size;
|
||||
NvU64 va_per_entry;
|
||||
uvm_mmu_page_table_alloc_t *tree_alloc;
|
||||
|
||||
status = uvm_page_tree_init(gpu,
|
||||
NULL,
|
||||
@@ -1059,20 +1074,30 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
// Make sure that RM's part of the VA is aligned to the VA covered by a
|
||||
// single top level PDE.
|
||||
UVM_ASSERT_MSG(gpu->parent->rm_va_base % va_per_entry == 0,
|
||||
"va_base 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_base, va_per_entry);
|
||||
"va_base 0x%llx va_per_entry 0x%llx\n",
|
||||
gpu->parent->rm_va_base,
|
||||
va_per_entry);
|
||||
UVM_ASSERT_MSG(gpu->parent->rm_va_size % va_per_entry == 0,
|
||||
"va_size 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_size, va_per_entry);
|
||||
"va_size 0x%llx va_per_entry 0x%llx\n",
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
tree_alloc->addr.address,
|
||||
num_entries,
|
||||
tree_alloc->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->rm_address_space_moved_to_page_tree = true;
|
||||
|
||||
return NV_OK;
|
||||
@@ -1212,6 +1237,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
size_t len;
|
||||
NV_STATUS status;
|
||||
|
||||
if (gpu->parent->smc.enabled) {
|
||||
@@ -1229,6 +1256,20 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
|
||||
gpu->smc.swizz_id = gpu_info->smcSwizzId;
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->parent->uuid);
|
||||
snprintf(gpu->name,
|
||||
sizeof(gpu->name),
|
||||
"ID %u: %s",
|
||||
uvm_id_value(gpu->id),
|
||||
uuid_buffer + 4);
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->uuid);
|
||||
len = strlen(gpu->name);
|
||||
snprintf(gpu->name + len,
|
||||
sizeof(gpu->name) - len,
|
||||
" UVM-GI-%s",
|
||||
uuid_buffer + 8);
|
||||
|
||||
// Initialize the per-GPU procfs dirs as early as possible so that other
|
||||
// parts of the driver can add files in them as part of their per-GPU init.
|
||||
status = init_procfs_dirs(gpu);
|
||||
@@ -1338,7 +1379,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
NV_STATUS status;
|
||||
bool alloc_parent = (parent_gpu == NULL);
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
@@ -1364,13 +1404,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
if (alloc_parent)
|
||||
fill_parent_gpu_info(parent_gpu, gpu_info);
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
|
||||
snprintf(gpu->name,
|
||||
sizeof(gpu->name),
|
||||
"ID %u: %s",
|
||||
uvm_id_value(gpu->id),
|
||||
uuid_buffer);
|
||||
|
||||
// After this point all error clean up should be handled by remove_gpu()
|
||||
|
||||
if (!gpu_supports_uvm(parent_gpu)) {
|
||||
@@ -1432,13 +1465,25 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
if (alloc_parent) {
|
||||
if (gpu->parent->smc.enabled) {
|
||||
status = discover_smc_peers(gpu);
|
||||
if (status != NV_OK) {
|
||||
// Nobody can have retained the GPU yet, since we still hold the
|
||||
// global lock.
|
||||
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
|
||||
atomic64_set(&gpu->retained_count, 0);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else if (alloc_parent) {
|
||||
status = discover_nvlink_peers(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Nobody can have retained the GPU yet, since we still hold the global
|
||||
// lock.
|
||||
// Nobody can have retained the GPU yet, since we still hold the
|
||||
// global lock.
|
||||
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
|
||||
atomic64_set(&gpu->retained_count, 0);
|
||||
goto error;
|
||||
@@ -1686,7 +1731,7 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
for_each_sub_processor_index(sub_processor_index)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
uvm_kvfree(parent_gpu);
|
||||
@@ -1915,32 +1960,25 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id(gpu_id) {
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
if (gpu) {
|
||||
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
|
||||
return gpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
return gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
static uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@@ -1998,7 +2036,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
if (parent_gpu != NULL) {
|
||||
// If the UUID has been seen before, and if SMC is enabled, then check
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
// look-up above may have succeeded for a different partition with the
|
||||
// same parent GPU.
|
||||
if (gpu_info->smcEnabled) {
|
||||
@@ -2287,7 +2325,7 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
// Create a symlink from UVM GPU UUID (UVM-GPU-...) to the UVM GPU ID gpuB
|
||||
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), uvm_gpu_uuid(remote));
|
||||
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), &remote->uuid);
|
||||
peer_caps->procfs.peer_symlink_file[local_idx] = proc_symlink(symlink_name,
|
||||
local->procfs.dir_peers,
|
||||
gpu_dir_name);
|
||||
@@ -2297,6 +2335,24 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_peer_files(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
uvm_gpu_t *gpu1,
|
||||
const UvmGpuP2PCapsParams *p2p_caps_params,
|
||||
@@ -2377,16 +2433,41 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
}
|
||||
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
return init_procfs_peer_files(gpu0, gpu1);
|
||||
}
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU32 sub_processor_index;
|
||||
uvm_gpu_t *other_gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
UVM_ASSERT(gpu);
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(gpu->parent->smc.enabled);
|
||||
|
||||
for_each_sub_processor_index(sub_processor_index) {
|
||||
uvm_gpu_peer_t *peer_caps;
|
||||
|
||||
other_gpu = gpu->parent->gpus[sub_processor_index];
|
||||
if (!other_gpu || other_gpu == gpu)
|
||||
continue;
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
|
||||
if (peer_caps->ref_count == 1)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(peer_caps->ref_count == 0);
|
||||
|
||||
memset(peer_caps, 0, sizeof(*peer_caps));
|
||||
peer_caps->ref_count = 1;
|
||||
|
||||
status = init_procfs_peer_files(gpu, other_gpu);
|
||||
if (status != NV_OK) {
|
||||
peer_caps->ref_count = 0;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@@ -2489,9 +2570,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
|
||||
|
||||
UVM_ASSERT(gpu);
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (gpu->parent->smc.enabled)
|
||||
return NV_OK;
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
|
||||
for_each_gpu(other_gpu) {
|
||||
UvmGpuP2PCapsParams p2p_caps_params;
|
||||
@@ -2592,10 +2671,6 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
UVM_ASSERT(gpu0);
|
||||
UVM_ASSERT(gpu1);
|
||||
|
||||
// P2P is not supported under SMC partitioning
|
||||
UVM_ASSERT(!gpu0->parent->smc.enabled);
|
||||
UVM_ASSERT(!gpu1->parent->smc.enabled);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
|
||||
@@ -2638,9 +2713,9 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
// IDs queried from the peer table above which are about to be removed from
|
||||
// the global table.
|
||||
if (gpu0->parent->access_counters_supported)
|
||||
uvm_gpu_access_counter_buffer_flush(gpu0);
|
||||
uvm_parent_gpu_access_counter_buffer_flush(gpu0->parent);
|
||||
if (gpu1->parent->access_counters_supported)
|
||||
uvm_gpu_access_counter_buffer_flush(gpu1);
|
||||
uvm_parent_gpu_access_counter_buffer_flush(gpu1->parent);
|
||||
|
||||
memset(peer_caps, 0, sizeof(*peer_caps));
|
||||
}
|
||||
@@ -2668,12 +2743,17 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu)
|
||||
{
|
||||
size_t peer_index;
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
// Indirect peers are accessed as sysmem addresses
|
||||
if (peer_caps->is_indirect_peer)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
// MIG instances in the same physical GPU have vidmem addresses
|
||||
if (local_gpu->parent == remote_gpu->parent)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
if (uvm_id_value(local_gpu->id) < uvm_id_value(remote_gpu->id))
|
||||
peer_index = 0;
|
||||
else
|
||||
@@ -3285,12 +3365,19 @@ NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *fil
|
||||
.user_client = params->hClient,
|
||||
.user_object = params->hSmcPartRef,
|
||||
};
|
||||
NvProcessorUuid gpu_instance_uuid;
|
||||
NV_STATUS status;
|
||||
|
||||
return uvm_va_space_register_gpu(va_space,
|
||||
¶ms->gpu_uuid,
|
||||
&user_rm_va_space,
|
||||
¶ms->numaEnabled,
|
||||
¶ms->numaNodeId);
|
||||
status = uvm_va_space_register_gpu(va_space,
|
||||
¶ms->gpu_uuid,
|
||||
&user_rm_va_space,
|
||||
¶ms->numaEnabled,
|
||||
¶ms->numaNodeId,
|
||||
&gpu_instance_uuid);
|
||||
if (status == NV_OK)
|
||||
uvm_uuid_copy(¶ms->gpu_uuid, &gpu_instance_uuid);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp)
|
||||
@@ -3363,10 +3450,10 @@ NV_STATUS uvm_test_set_prefetch_filtering(UVM_TEST_SET_PREFETCH_FILTERING_PARAMS
|
||||
|
||||
switch (params->filtering_mode) {
|
||||
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_ALL:
|
||||
uvm_gpu_disable_prefetch_faults(gpu->parent);
|
||||
uvm_parent_gpu_disable_prefetch_faults(gpu->parent);
|
||||
break;
|
||||
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_NONE:
|
||||
uvm_gpu_enable_prefetch_faults(gpu->parent);
|
||||
uvm_parent_gpu_enable_prefetch_faults(gpu->parent);
|
||||
break;
|
||||
default:
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@@ -618,9 +618,10 @@ struct uvm_gpu_struct
|
||||
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
|
||||
// Nice printable name in the format:
|
||||
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
@@ -656,6 +657,10 @@ struct uvm_gpu_struct
|
||||
// can allocate through PMM (PMA).
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
// Max supported vidmem page size may be smaller than the max GMMU page
|
||||
// size, because of the vMMU supported page sizes.
|
||||
NvU64 max_vidmem_page_size;
|
||||
|
||||
struct
|
||||
{
|
||||
// True if the platform supports HW coherence and the GPU's memory
|
||||
@@ -844,6 +849,9 @@ struct uvm_gpu_struct
|
||||
|
||||
struct proc_dir_entry *dir_symlink;
|
||||
|
||||
// The GPU instance UUID symlink if SMC is enabled.
|
||||
struct proc_dir_entry *gpu_instance_uuid_symlink;
|
||||
|
||||
struct proc_dir_entry *info_file;
|
||||
|
||||
struct proc_dir_entry *dir_peers;
|
||||
@@ -1210,11 +1218,6 @@ static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
return gpu->name;
|
||||
}
|
||||
|
||||
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
|
||||
{
|
||||
return &gpu->parent->uuid;
|
||||
}
|
||||
|
||||
static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (gpu->parent->smc.enabled)
|
||||
@@ -1234,6 +1237,9 @@ struct uvm_gpu_peer_struct
|
||||
// - The global lock is held.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be SMC peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be NVLINK peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
@@ -1319,17 +1325,17 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
|
||||
// Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
|
||||
// a circular dep between global and gpu modules.
|
||||
|
||||
// Get a uvm_gpu_t by UUID. This returns NULL if the GPU is not present. This
|
||||
// is the general purpose call that should be used normally.
|
||||
// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
|
||||
// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
|
||||
// instead.
|
||||
// Get a uvm_gpu_t by UUID (physical GPU UUID if SMC is not enabled, otherwise
|
||||
// GPU instance UUID).
|
||||
// This returns NULL if the GPU is not present.
|
||||
// This is the general purpose call that should be used normally.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Get a uvm_parent_gpu_t by UUID. Like uvm_gpu_get_by_uuid(), this function
|
||||
// returns NULL if the GPU has not been registered.
|
||||
// Get a uvm_parent_gpu_t by UUID (physical GPU UUID).
|
||||
// Like uvm_gpu_get_by_uuid(), this function returns NULL if the GPU has not
|
||||
// been registered.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
@@ -1340,13 +1346,6 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
// limited cases.
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
|
||||
// the partition hasn't been registered. This call needs to be used instead of
|
||||
// uvm_gpu_get_by_uuid() when a specific partition is targeted.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);
|
||||
|
||||
// Retain a gpu by uuid
|
||||
// Returns the retained uvm_gpu_t in gpu_out on success
|
||||
//
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -33,7 +33,7 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
|
||||
@@ -99,7 +99,8 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
|
||||
"Number of remote accesses on a region required to trigger a notification."
|
||||
"Valid values: [1, 65535]");
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode);
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode);
|
||||
|
||||
static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
|
||||
|
||||
@@ -126,7 +127,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are disabled by default on all systems except P9.
|
||||
// - MIMC migrations are disabled by default on all non-ATS systems.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
@@ -149,7 +150,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
if (UVM_ATS_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
return false;
|
||||
@@ -281,7 +282,7 @@ get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm
|
||||
&(access_counters)->current_config.momc;
|
||||
}
|
||||
|
||||
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
@@ -340,7 +341,7 @@ static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *confi
|
||||
UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
@@ -444,12 +445,12 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
uvm_gpu_deinit_access_counters(parent_gpu);
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
@@ -475,7 +476,7 @@ void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
batch_context->phys.translations = NULL;
|
||||
}
|
||||
|
||||
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return false;
|
||||
@@ -518,7 +519,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
|
||||
// taken control of the notify buffer since the GPU was initialized. Then
|
||||
// flush old notifications. This will update the cached_put pointer.
|
||||
access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
access_counters->current_config.threshold = config->threshold;
|
||||
|
||||
@@ -537,20 +538,20 @@ error:
|
||||
|
||||
// If ownership is yielded as part of reconfiguration, the access counters
|
||||
// handling refcount may not be 0
|
||||
static void access_counters_yield_ownership(uvm_gpu_t *gpu)
|
||||
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
|
||||
// Wait for any pending clear operation befor releasing ownership
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
|
||||
&access_counters->rm_info));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
@@ -579,14 +580,14 @@ static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConf
|
||||
|
||||
// Decrement the refcount of access counter enablement. If this is the last
|
||||
// reference, disable the HW feature.
|
||||
static void gpu_access_counters_disable(uvm_gpu_t *gpu)
|
||||
static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
|
||||
|
||||
if (--gpu->parent->isr.access_counters.handling_ref_count == 0)
|
||||
access_counters_yield_ownership(gpu);
|
||||
if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
|
||||
access_counters_yield_ownership(parent_gpu);
|
||||
}
|
||||
|
||||
// Invoked during registration of the GPU in the VA space
|
||||
@@ -598,7 +599,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
}
|
||||
else {
|
||||
@@ -616,7 +617,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
// modified to protect from concurrent enablement of access counters in
|
||||
// another GPU
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
}
|
||||
|
||||
// If this is the first reference taken on access counters, dropping the
|
||||
@@ -626,22 +627,24 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
gpu_access_counters_disable(gpu);
|
||||
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
|
||||
parent_gpu->id)) {
|
||||
parent_gpu_access_counters_disable(parent_gpu);
|
||||
|
||||
// If this is VA space reconfigured access counters, clear the
|
||||
// ownership to allow for other processes to invoke the reconfiguration
|
||||
if (gpu->parent->access_counter_buffer_info.reconfiguration_owner == va_space)
|
||||
gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
|
||||
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
@@ -660,15 +663,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
|
||||
}
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode)
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode)
|
||||
{
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
|
||||
@@ -680,28 +684,28 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
|
||||
gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get);
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
|
||||
++get;
|
||||
if (get == access_counters->max_notifications)
|
||||
get = 0;
|
||||
}
|
||||
|
||||
write_get(gpu->parent, get);
|
||||
write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Disables access counter interrupts and notification servicing
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
|
||||
@@ -1027,7 +1031,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
if (!iter.migratable)
|
||||
continue;
|
||||
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, address, processor);
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
|
||||
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
|
||||
// If the page is throttling, ignore the access counter
|
||||
// notification
|
||||
@@ -1212,7 +1216,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context->mm = mm;
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
@@ -1221,7 +1226,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
@@ -1506,8 +1512,6 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
@@ -1519,6 +1523,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
|
||||
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
@@ -1546,7 +1551,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, addr);
|
||||
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);
|
||||
|
||||
// resident_id might be invalid or might already be the same as the GPU
|
||||
// which received the notification if the memory was already migrated before
|
||||
@@ -1602,6 +1607,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
@@ -1610,16 +1616,24 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
|
||||
uvm_page_mask_zero(accessed_pages);
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
|
||||
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
else
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
|
||||
expand_notification_block(gpu_va_space,
|
||||
va_block,
|
||||
batch_context->block_service_context.block_context,
|
||||
accessed_pages,
|
||||
current_entry);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
@@ -1698,6 +1712,9 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
|
||||
// location is set
|
||||
// If no pages were actually migrated, don't clear the access counters.
|
||||
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
@@ -1985,7 +2002,7 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
params->enabled = uvm_gpu_access_counters_required(gpu->parent);
|
||||
params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
@@ -2050,11 +2067,11 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = gpu_access_counters_enable(gpu, &config);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
else
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
@@ -2066,7 +2083,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
// enabled in at least gpu. This inconsistent state is not visible to other
|
||||
// threads or VA spaces because of the ISR lock, and it is immediately
|
||||
// rectified by retaking ownership.
|
||||
access_counters_yield_ownership(gpu);
|
||||
access_counters_yield_ownership(gpu->parent);
|
||||
status = access_counters_take_ownership(gpu, &config);
|
||||
|
||||
// Retaking ownership failed, so RM owns the interrupt.
|
||||
@@ -2080,8 +2097,8 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
|
||||
}
|
||||
|
||||
uvm_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
gpu_access_counters_disable(gpu);
|
||||
uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
parent_gpu_access_counters_disable(gpu->parent);
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
@@ -2167,42 +2184,42 @@ exit_release_gpu:
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
|
||||
{
|
||||
bool change_intr_state = false;
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (do_ignore) {
|
||||
if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer_info.notifications_ignored_count >= 1);
|
||||
if (--gpu->parent->access_counter_buffer_info.notifications_ignored_count == 0)
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
|
||||
if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
|
||||
if (change_intr_state) {
|
||||
// We need to avoid an interrupt storm while ignoring notifications. We
|
||||
// just disable the interrupt.
|
||||
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (do_ignore)
|
||||
uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
else
|
||||
uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (!do_ignore)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
@@ -2216,7 +2233,7 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_set_ignore(gpu, params->ignore);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
|
||||
else
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -27,13 +27,13 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
|
||||
|
||||
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Ignore or unignore access counters notifications. Ignoring means that the
|
||||
// bottom half is a no-op which just leaves notifications in the HW buffer
|
||||
@@ -46,7 +46,7 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
|
||||
//
|
||||
// When uningoring, the interrupt conditions will be re-evaluated to trigger
|
||||
// processing of buffered notifications, if any exist.
|
||||
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
|
||||
|
||||
// Return whether the VA space has access counter migrations enabled. The
|
||||
// caller must ensure that the VA space cannot go away.
|
||||
@@ -63,7 +63,7 @@ void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
|
||||
|
||||
// Check whether access counters should be enabled when the given GPU is
|
||||
// registered on any VA space.
|
||||
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Functions used to enable/disable access counters on a GPU in the given VA
|
||||
// space.
|
||||
@@ -72,12 +72,12 @@ bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
// counters are currently enabled. The hardware notifications and interrupts on
|
||||
// the GPU are enabled the first time any VA space invokes
|
||||
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
|
||||
// uvm_gpu_access_counters_disable
|
||||
// uvm_parent_gpu_access_counters_disable().
|
||||
//
|
||||
// Locking: the VA space lock must not be held by the caller since these
|
||||
// functions may take the access counters ISR lock.
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
|
||||
|
||||
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -100,7 +100,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
if (!uvm_parent_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
@@ -137,7 +137,7 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
|
||||
// interrupts will be triggered by the gpu and faults may stay
|
||||
// unserviced. Therefore, if there is a fault in the queue, we schedule
|
||||
// a bottom half unconditionally.
|
||||
if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
|
||||
if (!uvm_parent_gpu_non_replayable_faults_pending(parent_gpu))
|
||||
return 0;
|
||||
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
@@ -167,7 +167,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_gpu_access_counters_pending(parent_gpu)) {
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.access_counters.service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
@@ -295,7 +295,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
if (parent_gpu->replayable_faults_supported) {
|
||||
status = uvm_gpu_fault_buffer_init(parent_gpu);
|
||||
status = uvm_parent_gpu_fault_buffer_init(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -361,7 +361,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
status = uvm_gpu_init_access_counters(parent_gpu);
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -423,7 +423,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// bottom half never take the global lock, since we're holding it here.
|
||||
//
|
||||
// Note that it's safe to call nv_kthread_q_stop() even if
|
||||
// nv_kthread_q_init() failed in uvm_gpu_init_isr().
|
||||
// nv_kthread_q_init() failed in uvm_parent_gpu_init_isr().
|
||||
nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
|
||||
nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
|
||||
}
|
||||
@@ -438,8 +438,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// replayable_faults.disable_intr_ref_count since they must retain the
|
||||
// GPU across uvm_parent_gpu_replayable_faults_isr_lock/
|
||||
// uvm_parent_gpu_replayable_faults_isr_unlock. This means the
|
||||
// uvm_gpu_replayable_faults_disable_intr above could only have raced
|
||||
// with bottom halves.
|
||||
// uvm_parent_gpu_replayable_faults_disable_intr above could only have
|
||||
// raced with bottom halves.
|
||||
//
|
||||
// If we cleared replayable_faults.handling before the bottom half got
|
||||
// to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
|
||||
@@ -455,13 +455,13 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
parent_gpu->isr.replayable_faults.disable_intr_ref_count);
|
||||
|
||||
uvm_gpu_fault_buffer_deinit(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_gpu_deinit_access_counters(parent_gpu);
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -116,8 +116,8 @@
|
||||
|
||||
|
||||
// There is no error handling in this function. The caller is in charge of
|
||||
// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
|
||||
@@ -145,7 +145,7 @@ NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *pare
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
|
||||
@@ -163,7 +163,7 @@ void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_
|
||||
non_replayable_faults->fault_cache = NULL;
|
||||
}
|
||||
|
||||
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvBool has_pending_faults;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -26,12 +26,12 @@
|
||||
#include <nvstatus.h>
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,6 +44,24 @@
|
||||
// provides some background for understanding replayable faults, non-replayable
|
||||
// faults, and how UVM services each fault type.
|
||||
|
||||
// The HW fault buffer flush mode instructs RM on how to flush the hardware
|
||||
// replayable fault buffer; it is only used in Confidential Computing.
|
||||
//
|
||||
// Unless HW_FAULT_BUFFER_FLUSH_MODE_MOVE is functionally required (because UVM
|
||||
// needs to inspect the faults currently present in the HW fault buffer) it is
|
||||
// recommended to use HW_FAULT_BUFFER_FLUSH_MODE_DISCARD for performance
|
||||
// reasons.
|
||||
typedef enum
|
||||
{
|
||||
// Flush the HW fault buffer, discarding all the resulting faults. UVM never
|
||||
// gets to see these faults.
|
||||
HW_FAULT_BUFFER_FLUSH_MODE_DISCARD,
|
||||
|
||||
// Flush the HW fault buffer, and move all the resulting faults to the SW
|
||||
// fault ("shadow") buffer.
|
||||
HW_FAULT_BUFFER_FLUSH_MODE_MOVE,
|
||||
} hw_fault_buffer_flush_mode_t;
|
||||
|
||||
#define UVM_PERF_REENABLE_PREFETCH_FAULTS_LAPSE_MSEC_DEFAULT 1000
|
||||
|
||||
// Lapse of time in milliseconds after which prefetch faults can be re-enabled.
|
||||
@@ -226,7 +244,7 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
batch_context->utlbs = NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -253,7 +271,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
goto fail;
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
status = uvm_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
|
||||
status = uvm_parent_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
}
|
||||
@@ -261,28 +279,28 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
uvm_gpu_fault_buffer_deinit(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Reinitialize state relevant to replayable fault handling after returning
|
||||
// from a power management cycle.
|
||||
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
fault_buffer_reinit_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported)
|
||||
uvm_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
|
||||
|
||||
fault_buffer_deinit_replayable_faults(parent_gpu);
|
||||
|
||||
@@ -297,7 +315,7 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
|
||||
@@ -533,25 +551,26 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
parent_gpu->fault_buffer_hal->write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
|
||||
// In Confidential Computing GSP-RM owns the HW replayable fault buffer.
|
||||
// Flushing the fault buffer implies flushing both the HW buffer (using a RM
|
||||
// API), and the SW buffer accessible by UVM ("shadow" buffer).
|
||||
//
|
||||
// The HW buffer needs to be flushed first. This is because, once that flush
|
||||
// completes, any faults that were present in the HW buffer have been moved to
|
||||
// the shadow buffer, or have been discarded by RM.
|
||||
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_fault_buffer_flush_mode_t flush_mode)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS status;
|
||||
NvBool is_flush_mode_move;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
UVM_ASSERT((flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE) || (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_DISCARD));
|
||||
|
||||
// When Confidential Computing is enabled, GSP-RM owns the HW replayable
|
||||
// fault buffer. Flushing the fault buffer implies flushing both the HW
|
||||
// buffer (using a RM API), and the SW buffer accessible by UVM ("shadow"
|
||||
// buffer).
|
||||
//
|
||||
// The HW buffer needs to be flushed first. This is because, once that
|
||||
// flush completes, any faults that were present in the HW buffer when
|
||||
// fault_buffer_flush_locked is called, are now either flushed from the HW
|
||||
// buffer, or are present in the shadow buffer and are about to be discarded
|
||||
// too.
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
// Flush the HW replayable buffer owned by GSP-RM.
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
|
||||
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@@ -595,10 +614,9 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT || flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT) {
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu);
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -1435,7 +1453,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_fault_access_type_to_prot(service_access_type)))
|
||||
continue;
|
||||
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, current_entry->fault_address, gpu->id);
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block,
|
||||
block_context->block_context,
|
||||
current_entry->fault_address,
|
||||
gpu->id);
|
||||
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
|
||||
// Throttling is implemented by sleeping in the fault handler on
|
||||
// the CPU and by continuing to process faults on other pages on
|
||||
@@ -1981,7 +2002,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
|
||||
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
|
||||
// GSP to copy all available faults from the HW buffer into the shadow
|
||||
// buffer.
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent);
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent, HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
@@ -2738,14 +2759,14 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu, uvm_fau
|
||||
(uvm_enable_builtin_tests &&
|
||||
parent_gpu->rm_info.isSimulated &&
|
||||
batch_context->num_invalid_prefetch_faults > 5))) {
|
||||
uvm_gpu_disable_prefetch_faults(parent_gpu);
|
||||
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
|
||||
|
||||
// Reenable prefetch faults after some time
|
||||
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
|
||||
uvm_gpu_enable_prefetch_faults(parent_gpu);
|
||||
uvm_parent_gpu_enable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2872,7 +2893,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
UVM_DBG_PRINT("Error servicing replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
@@ -2883,7 +2904,7 @@ void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
@@ -2940,7 +2961,7 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA
|
||||
|
||||
do {
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
pending = uvm_gpu_replayable_faults_pending(gpu->parent);
|
||||
pending = uvm_parent_gpu_replayable_faults_pending(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
|
||||
if (!pending)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -52,12 +52,12 @@ typedef enum
|
||||
|
||||
const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Clear valid bit for all remaining unserviced faults in the buffer, set GET to
|
||||
// PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
|
||||
@@ -68,8 +68,8 @@ bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);
|
||||
|
||||
// Enable/disable HW support for prefetch-initiated faults
|
||||
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Service pending replayable faults on the given GPU. This function must be
|
||||
// only called from the ISR bottom half
|
||||
|
||||
@@ -1306,7 +1306,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_processor_mask_t map_processors;
|
||||
uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction;
|
||||
uvm_processor_id_t id;
|
||||
NV_STATUS tracker_status;
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -1333,9 +1333,9 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
|
||||
// Exclude the processors that have been already mapped due to
|
||||
// AccessedBy.
|
||||
uvm_processor_mask_andnot(&map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
|
||||
uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
|
||||
|
||||
for_each_gpu_id_in_mask(id, &map_processors) {
|
||||
for_each_gpu_id_in_mask(id, map_processors) {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
|
||||
@@ -1866,7 +1866,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_page_mask_t *same_devmem_page_mask)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index);
|
||||
uvm_va_block_region_t chunk_region;
|
||||
struct page *dst_page;
|
||||
|
||||
@@ -2708,7 +2708,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
// Since there is a CPU resident page, there shouldn't be one
|
||||
// anywhere else. TODO: Bug 3660922: Need to handle read
|
||||
// duplication at some point.
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
|
||||
service_context->block_context,
|
||||
page_index));
|
||||
|
||||
// migrate_vma_setup() was able to isolate and lock the page;
|
||||
// therefore, it is CPU resident and not mapped.
|
||||
@@ -2725,8 +2727,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
// used for GPU to GPU copies. It can't be an evicted page because
|
||||
// migrate_vma_setup() would have found a source page.
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
|
||||
service_context->block_context,
|
||||
page_index));
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2019 NVidia Corporation
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -320,7 +320,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // IN
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN
|
||||
NvU32 numGpus; // IN
|
||||
NvU64 serverId NV_ALIGN_BYTES(8); // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
@@ -344,9 +344,9 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // OUT
|
||||
NvU32 validCount; // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT
|
||||
NvU32 validCount; // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_GET_GPU_UUID_TABLE_PARAMS;
|
||||
|
||||
#if defined(WIN32) || defined(WIN64)
|
||||
@@ -494,7 +494,7 @@ typedef struct
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
@@ -552,7 +552,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // IN
|
||||
NvProcessorUuid gpu_uuid; // IN/OUT
|
||||
NvBool numaEnabled; // OUT
|
||||
NvS32 numaNodeId; // OUT
|
||||
NvS32 rmCtrlFd; // IN
|
||||
@@ -835,7 +835,14 @@ typedef struct
|
||||
|
||||
//
|
||||
// Initialize any tracker object such as a queue or counter
|
||||
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, UvmToolsCreateProcessorCounters
|
||||
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
|
||||
// UvmToolsCreateProcessorCounters.
|
||||
// Note that the order of structure elements has the version as the last field.
|
||||
// This is used to tell whether the kernel supports V2 events or not because
|
||||
// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
|
||||
// field but V2 will. This is needed because it is possible to create an event
|
||||
// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
|
||||
// been called yet and the kernel version is unknown.
|
||||
//
|
||||
#define UVM_TOOLS_INIT_EVENT_TRACKER UVM_IOCTL_BASE(56)
|
||||
typedef struct
|
||||
@@ -847,6 +854,8 @@ typedef struct
|
||||
NvU32 allProcessors; // IN
|
||||
NvU32 uvmFd; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 requestedVersion; // IN
|
||||
NvU32 grantedVersion; // OUT
|
||||
} UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;
|
||||
|
||||
//
|
||||
@@ -927,6 +936,12 @@ typedef struct
|
||||
|
||||
//
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
|
||||
// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
|
||||
// processor IDs (physical GPU UUIDs) will be reported.
|
||||
// tablePtr == 0 and count == 0 can be used to query how many processors are
|
||||
// present in order to dynamically allocate the correct size array since the
|
||||
// total number of processors is returned in 'count'.
|
||||
//
|
||||
#define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE UVM_IOCTL_BASE(64)
|
||||
typedef struct
|
||||
@@ -934,6 +949,7 @@ typedef struct
|
||||
NvU64 tablePtr NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 count; // IN/OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 version; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
|
||||
@@ -979,7 +995,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
|
||||
|
||||
@@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
#endif
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// Various issues prevent us from using mmu_notifiers in older kernels. These
|
||||
// include:
|
||||
// - ->release being called under RCU instead of SRCU: fixed by commit
|
||||
|
||||
@@ -633,8 +633,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
const UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *owning_gpu = NULL;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_gpu_t *owning_gpu;
|
||||
|
||||
if (mem_info->egm)
|
||||
UVM_ASSERT(mem_info->sysmem);
|
||||
@@ -653,16 +652,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
owning_gpu = gpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@@ -954,6 +944,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
|
||||
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
|
||||
@@ -989,7 +985,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (uvm_api_range_invalid_4k(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
@@ -86,7 +86,7 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
|
||||
|
||||
// Only map those pages that are not already mapped on destination
|
||||
for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, va_block_context, dest_id, page_index);
|
||||
if (prot == UVM_PROT_NONE)
|
||||
continue;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -149,6 +149,26 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The aperture may filter the biggest page size:
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
{
|
||||
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
|
||||
|
||||
// There may be scenarios where the GMMU must use a subset of the supported
|
||||
// page sizes, e.g., to comply with the vMMU supported page sizes due to
|
||||
// segmentation sizes.
|
||||
if (aperture == UVM_APERTURE_VID) {
|
||||
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
|
||||
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
|
||||
}
|
||||
else {
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@@ -856,7 +876,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
|
||||
if (!page_tree_ats_init_required(tree))
|
||||
return NV_OK;
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(tree);
|
||||
page_size = mmu_biggest_page_size(tree, UVM_APERTURE_VID);
|
||||
|
||||
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
|
||||
|
||||
@@ -1090,6 +1110,8 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
tree->gpu_va_space = gpu_va_space;
|
||||
tree->big_page_size = big_page_size;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
|
||||
|
||||
page_tree_set_location(tree, location);
|
||||
|
||||
uvm_tracker_init(&tree->tracker);
|
||||
@@ -2301,7 +2323,7 @@ NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_VID);
|
||||
size = UVM_ALIGN_UP(gpu->mem_info.max_allocatable_address + 1, page_size);
|
||||
|
||||
UVM_ASSERT(page_size);
|
||||
@@ -2338,9 +2360,9 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
|
||||
return NV_OK;
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
|
||||
aperture = uvm_gpu_peer_aperture(gpu, peer);
|
||||
page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
|
||||
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
|
||||
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
|
||||
phys_offset = 0ULL;
|
||||
|
||||
@@ -2783,7 +2805,7 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
|
||||
// sysmem mappings with 128K entries.
|
||||
UVM_ASSERT(is_power_of_2(mapping_size));
|
||||
UVM_ASSERT(mapping_size >= UVM_SIZE_1GB);
|
||||
UVM_ASSERT(mapping_size >= uvm_mmu_biggest_page_size(&gpu->address_space_tree));
|
||||
UVM_ASSERT(mapping_size >= mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS));
|
||||
UVM_ASSERT(mapping_size <= flat_sysmem_va_size);
|
||||
|
||||
flat_sysmem_va_size = UVM_ALIGN_UP(flat_sysmem_va_size, mapping_size);
|
||||
@@ -2828,7 +2850,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
|
||||
if (sysmem_mapping->range_vec == NULL) {
|
||||
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
|
||||
NvU64 phys_offset = curr_pa;
|
||||
NvU32 page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
uvm_pmm_alloc_flags_t pmm_flags;
|
||||
|
||||
// No eviction is requested when allocating the page tree storage,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -612,6 +612,9 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
|
||||
}
|
||||
|
||||
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
|
||||
// the largest page size supported in a given system, which considers the GMMU
|
||||
// and vMMU page sizes and segment sizes.
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
|
||||
@@ -642,11 +645,6 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
|
||||
return page_size;
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_biggest_page_size(uvm_page_tree_t *tree)
|
||||
{
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
|
||||
|
||||
@@ -1442,6 +1442,7 @@ static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_locatio
|
||||
|
||||
static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
page_thrashing_info_t *page_thrashing,
|
||||
uvm_processor_id_t requester)
|
||||
@@ -1460,7 +1461,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
|
||||
hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
|
||||
|
||||
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
|
||||
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
// HMM pages always start out resident on the CPU but may not be
|
||||
// recorded in the va_block state because hmm_range_fault() or
|
||||
@@ -1601,6 +1602,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
// that case we keep the page pinned while applying the same algorithm as in
|
||||
// Phase1.
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address,
|
||||
uvm_processor_id_t requester)
|
||||
{
|
||||
@@ -1713,6 +1715,7 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
else {
|
||||
hint = get_hint_for_migration_thrashing(va_space_thrashing,
|
||||
va_block,
|
||||
va_block_context,
|
||||
page_index,
|
||||
page_thrashing,
|
||||
requester);
|
||||
|
||||
@@ -74,7 +74,9 @@ typedef struct
|
||||
} uvm_perf_thrashing_hint_t;
|
||||
|
||||
// Obtain a hint to prevent thrashing on the page with given address
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block, NvU64 address,
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address,
|
||||
uvm_processor_id_t requester);
|
||||
|
||||
// Obtain a pointer to a mask with the processors that are thrashing on the
|
||||
|
||||
@@ -1408,8 +1408,6 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(accessing_gpu, gpu);
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
|
||||
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
if (peer_caps->is_indirect_peer ||
|
||||
(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)) {
|
||||
// Indirect peers are accessed as sysmem addresses, so they don't need
|
||||
|
||||
@@ -1082,6 +1082,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
uvm_va_block_context_t *va_block_context = NULL;
|
||||
NvU32 num_blocks;
|
||||
NvU32 index = 0;
|
||||
uvm_gpu_phys_address_t phys_addr = {0};
|
||||
@@ -1099,9 +1100,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
}
|
||||
TEST_CHECK_RET(va_block);
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(NULL);
|
||||
TEST_CHECK_RET(va_block_context);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, 0, gpu->id), gpu->id);
|
||||
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
|
||||
if (is_resident) {
|
||||
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
|
||||
phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
|
||||
@@ -1109,6 +1113,8 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
uvm_va_block_context_free(va_block_context);
|
||||
|
||||
TEST_CHECK_RET(is_resident);
|
||||
|
||||
// Perform the lookup for the whole root chunk
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
#include "uvm_processors.h"
|
||||
|
||||
static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
|
||||
const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
|
||||
const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
|
||||
|
||||
NV_STATUS uvm_processor_mask_cache_init(void)
|
||||
{
|
||||
|
||||
@@ -522,6 +522,9 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
|
||||
uvm_processor_id_t, \
|
||||
uvm_id_from_value)
|
||||
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
|
||||
|
||||
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
|
||||
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
|
||||
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
|
||||
@@ -567,6 +570,10 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
(uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
|
||||
i = uvm_gpu_id_next(i))
|
||||
|
||||
// Helper to iterate over all sub processor indexes.
|
||||
#define for_each_sub_processor_index(i) \
|
||||
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
|
||||
|
||||
// Helper to iterate over all valid processor ids.
|
||||
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
|
||||
|
||||
|
||||
@@ -41,15 +41,11 @@
|
||||
static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NvU64 retained_count = 0;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (parent_gpu)
|
||||
gpu = uvm_gpu_get_by_parent_and_swizz_id(parent_gpu, params->swizz_id);
|
||||
|
||||
gpu = uvm_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (gpu != NULL)
|
||||
retained_count = uvm_gpu_retained_count(gpu);
|
||||
|
||||
|
||||
@@ -40,7 +40,6 @@ typedef struct
|
||||
{
|
||||
// In params
|
||||
NvProcessorUuid gpu_uuid;
|
||||
NvU32 swizz_id;
|
||||
// Out params
|
||||
NvU64 ref_count NV_ALIGN_BYTES(8);
|
||||
NV_STATUS rmStatus;
|
||||
@@ -192,7 +191,7 @@ typedef struct
|
||||
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
|
||||
NvProcessorUuid preferred_location; // Out
|
||||
NvS32 preferred_cpu_nid; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 accessed_by_count; // Out
|
||||
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
|
||||
union
|
||||
@@ -505,7 +504,12 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
// In params
|
||||
UvmEventEntry entry; // contains only NvUxx types
|
||||
union
|
||||
{
|
||||
UvmEventEntry_V1 entry_v1; // contains only NvUxx types
|
||||
UvmEventEntry_V2 entry_v2; // contains only NvUxx types
|
||||
};
|
||||
NvU32 version;
|
||||
NvU32 count;
|
||||
|
||||
// Out param
|
||||
@@ -620,7 +624,7 @@ typedef struct
|
||||
|
||||
// Array of processors which have a resident copy of the page containing
|
||||
// lookup_address.
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 resident_on_count; // Out
|
||||
|
||||
// If the memory is resident on the CPU, the NUMA node on which the page
|
||||
@@ -631,24 +635,24 @@ typedef struct
|
||||
// system-page-sized portion of this allocation which contains
|
||||
// lookup_address is guaranteed to be resident on the corresponding
|
||||
// processor.
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// The physical address of the physical allocation backing lookup_address.
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
|
||||
// Array of processors which have a virtual mapping covering lookup_address.
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
NvU32 mapped_on_count; // Out
|
||||
|
||||
// The size of the virtual mapping covering lookup_address on each
|
||||
// mapped_on processor.
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// Array of processors which have physical memory populated that would back
|
||||
// lookup_address if it was resident.
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 populated_on_count; // Out
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -52,8 +52,19 @@ typedef enum
|
||||
|
||||
typedef unsigned long long UvmStream;
|
||||
|
||||
#define UVM_MAX_GPUS NV_MAX_DEVICES
|
||||
#define UVM_MAX_PROCESSORS (UVM_MAX_GPUS + 1)
|
||||
// The maximum number of GPUs changed when multiple MIG instances per
|
||||
// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
|
||||
#define UVM_MAX_GPUS_V1 NV_MAX_DEVICES
|
||||
#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
|
||||
#define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
|
||||
#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
|
||||
#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
|
||||
|
||||
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
|
||||
|
||||
#define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1)
|
||||
#define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2)
|
||||
@@ -152,6 +163,8 @@ typedef enum {
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC enabled,
|
||||
// or the GPU instance UUID of the partition.
|
||||
NvProcessorUuid gpuUuid;
|
||||
NvU32 gpuMappingType; // UvmGpuMappingType
|
||||
NvU32 gpuCachingType; // UvmGpuCachingType
|
||||
@@ -410,7 +423,29 @@ typedef struct
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo;
|
||||
} UvmEventCpuFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be 1st argument of this structure. Setting eventType to
|
||||
// UvmEventTypeMemoryViolation helps to identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 accessType; // read/write violation (UvmEventMemoryAccessType)
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets.
|
||||
//
|
||||
NvU16 padding16Bits;
|
||||
NvS32 nid; // NUMA node ID of faulting CPU
|
||||
NvU64 address; // faulting address
|
||||
NvU64 timeStamp; // cpu time when the fault occurred
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -567,7 +602,49 @@ typedef struct
|
||||
// on the gpu
|
||||
NvU64 endTimeStampGpu; // time stamp when the migration finished
|
||||
// on the gpu
|
||||
} UvmEventMigrationInfo;
|
||||
} UvmEventMigrationInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure. Setting eventType
|
||||
// to UvmEventTypeMigration helps to identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// Cause that triggered the migration
|
||||
//
|
||||
NvU8 migrationCause;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU16 padding16Bits;
|
||||
//
|
||||
// Indices are used for the source and destination of migration instead of
|
||||
// using gpu uuid/cpu id. This reduces the size of each event. The index to
|
||||
// gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable.
|
||||
// Currently we do not distinguish between CPUs so they all use index 0.
|
||||
//
|
||||
NvU16 srcIndex; // source CPU/GPU index
|
||||
NvU16 dstIndex; // destination CPU/GPU index
|
||||
NvS32 srcNid; // source CPU NUMA node ID
|
||||
NvS32 dstNid; // destination CPU NUMA node ID
|
||||
NvU64 address; // base virtual addr used for migration
|
||||
NvU64 migratedBytes; // number of bytes migrated
|
||||
NvU64 beginTimeStamp; // cpu time stamp when the memory transfer
|
||||
// was queued on the gpu
|
||||
NvU64 endTimeStamp; // cpu time stamp when the memory transfer
|
||||
// finalization was communicated to the cpu
|
||||
// For asynchronous operations this field
|
||||
// will be zero
|
||||
NvU64 rangeGroupId; // range group tied with this migration
|
||||
NvU64 beginTimeStampGpu; // time stamp when the migration started
|
||||
// on the gpu
|
||||
NvU64 endTimeStampGpu; // time stamp when the migration finished
|
||||
// on the gpu
|
||||
} UvmEventMigrationInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -633,7 +710,64 @@ typedef struct
|
||||
//
|
||||
NvU8 padding8Bits;
|
||||
NvU16 padding16Bits;
|
||||
} UvmEventGpuFaultInfo;
|
||||
} UvmEventGpuFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeGpuFault helps to identify event data in
|
||||
// a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_1;
|
||||
union
|
||||
{
|
||||
NvU16 gpcId; // If this is a replayable fault, this field contains
|
||||
// the physical GPC index where the fault was
|
||||
// triggered
|
||||
|
||||
NvU16 channelId; // If this is a non-replayable fault, this field
|
||||
// contains the id of the channel that launched the
|
||||
// operation that caused the fault.
|
||||
//
|
||||
// TODO: Bug 3283289: this field is ambiguous for
|
||||
// Ampere+ GPUs, but it is never consumed by clients.
|
||||
};
|
||||
NvU16 clientId; // Id of the MMU client that triggered the fault. This
|
||||
// is the value provided by HW and is architecture-
|
||||
// specific. There are separate client ids for
|
||||
// different client types (See dev_fault.h).
|
||||
NvU64 address; // virtual address at which gpu faulted
|
||||
NvU64 timeStamp; // time stamp when the cpu started processing the
|
||||
// fault
|
||||
NvU64 timeStampGpu; // gpu time stamp when the fault entry was written
|
||||
// in the fault buffer
|
||||
NvU32 batchId; // Per-GPU unique id to identify the faults serviced
|
||||
// in batch before:
|
||||
// - Issuing a replay for replayable faults
|
||||
// - Re-scheduling the channel for non-replayable
|
||||
// faults.
|
||||
NvU8 clientType; // Volta+ GPUs can fault on clients other than GR.
|
||||
// UvmEventFaultClientTypeGpc indicates replayable
|
||||
// fault, while UvmEventFaultClientTypeHub indicates
|
||||
// non-replayable fault.
|
||||
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_2;
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
} UvmEventGpuFaultInfo_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This info is provided when a gpu fault is replayed (for replayable faults)
|
||||
@@ -666,7 +800,25 @@ typedef struct
|
||||
// accesses is queued on the gpu
|
||||
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
|
||||
// executing on the gpu
|
||||
} UvmEventGpuFaultReplayInfo;
|
||||
} UvmEventGpuFaultReplayInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 clientType; // See clientType in UvmEventGpuFaultInfo
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
NvU32 batchId; // Per-GPU unique id to identify the faults that
|
||||
// have been serviced in batch
|
||||
NvU64 timeStamp; // cpu time when the replay of the faulting memory
|
||||
// accesses is queued on the gpu
|
||||
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
|
||||
// executing on the gpu
|
||||
} UvmEventGpuFaultReplayInfo_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This info is provided per fatal fault
|
||||
@@ -689,7 +841,26 @@ typedef struct
|
||||
NvU16 padding16bits;
|
||||
NvU64 address; // virtual address at which the processor faulted
|
||||
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
|
||||
} UvmEventFatalFaultInfo;
|
||||
} UvmEventFatalFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeFatalFault helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType. Only
|
||||
// valid if processorIndex is a GPU
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
NvU8 reason; // reason why the fault is fatal, refer
|
||||
// UvmEventFatalReason
|
||||
NvU16 processorIndex; // processor that experienced the fault
|
||||
NvU16 padding16bits;
|
||||
NvU64 address; // virtual address at which the processor faulted
|
||||
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
|
||||
} UvmEventFatalFaultInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -718,7 +889,38 @@ typedef struct
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInfo;
|
||||
} UvmEventReadDuplicateInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeReadDuplicate helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 size; // size in bytes of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 timeStamp; // cpu time stamp when the memory region becomes
|
||||
// read-duplicate. Since many processors can
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
|
||||
// mask that specifies in which processors this
|
||||
// memory region is read-duplicated. This is last
|
||||
// so UVM_PROCESSOR_MASK_SIZE can grow.
|
||||
} UvmEventReadDuplicateInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -728,13 +930,13 @@ typedef struct
|
||||
// identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
|
||||
// valid copy of the memory region
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
|
||||
// valid copy of the memory region
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
@@ -746,8 +948,34 @@ typedef struct
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInvalidateInfo;
|
||||
} UvmEventReadDuplicateInvalidateInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to
|
||||
// identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 padding8bits;
|
||||
NvU16 residentIndex;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 size; // size of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 timeStamp; // cpu time stamp when the memory region is no
|
||||
// longer read-duplicate. Since many processors can
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInvalidateInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -770,7 +998,30 @@ typedef struct
|
||||
// changed
|
||||
NvU64 timeStamp; // cpu time stamp when the new page size is
|
||||
// queued on the gpu
|
||||
} UvmEventPageSizeChangeInfo;
|
||||
} UvmEventPageSizeChangeInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypePageSizeChange helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 processorIndex; // cpu/gpu processor index for which the page size
|
||||
// changed
|
||||
NvU32 size; // new page size
|
||||
NvU64 address; // virtual address of the page whose size has
|
||||
// changed
|
||||
NvU64 timeStamp; // cpu time stamp when the new page size is
|
||||
// queued on the gpu
|
||||
} UvmEventPageSizeChangeInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -794,7 +1045,33 @@ typedef struct
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when thrashing is detected
|
||||
} UvmEventThrashingDetectedInfo;
|
||||
} UvmEventThrashingDetectedInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrashingDetected helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when thrashing is detected
|
||||
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
|
||||
// mask that specifies which processors are
|
||||
// fighting for this memory region. This is last
|
||||
// so UVM_PROCESSOR_MASK_SIZE can grow.
|
||||
} UvmEventThrashingDetectedInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -815,7 +1092,28 @@ typedef struct
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu start time stamp for the throttling operation
|
||||
} UvmEventThrottlingStartInfo;
|
||||
} UvmEventThrottlingStartInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrottlingStart helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu start time stamp for the throttling operation
|
||||
} UvmEventThrottlingStartInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -836,7 +1134,28 @@ typedef struct
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu end time stamp for the throttling operation
|
||||
} UvmEventThrottlingEndInfo;
|
||||
} UvmEventThrottlingEndInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrottlingEnd helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu end time stamp for the throttling operation
|
||||
} UvmEventThrottlingEndInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -892,7 +1211,36 @@ typedef struct
|
||||
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
|
||||
// the processor specified by srcIndex. If srcIndex
|
||||
// is a cpu, this field will be zero.
|
||||
} UvmEventMapRemoteInfo;
|
||||
} UvmEventMapRemoteInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeMapRemote helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 mapRemoteCause; // field to type UvmEventMapRemoteCause that tells
|
||||
// the cause for the page to be mapped remotely
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU16 padding16bits;
|
||||
NvU16 srcIndex; // index of the cpu/gpu being remapped
|
||||
NvU16 dstIndex; // index of the cpu/gpu memory that contains the
|
||||
// memory region data
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when all the required operations
|
||||
// have been pushed to the processor
|
||||
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
|
||||
// the processor specified by srcIndex. If srcIndex
|
||||
// is a cpu, this field will be zero.
|
||||
} UvmEventMapRemoteInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -918,7 +1266,33 @@ typedef struct
|
||||
NvU64 addressIn; // virtual address that caused the eviction
|
||||
NvU64 size; // size of the memory region that being evicted
|
||||
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
|
||||
} UvmEventEvictionInfo;
|
||||
} UvmEventEvictionInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeEviction helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU16 srcIndex; // index of the cpu/gpu from which data is being
|
||||
// evicted
|
||||
NvU16 dstIndex; // index of the cpu/gpu memory to which data is
|
||||
// going to be stored
|
||||
NvU64 addressOut; // virtual address of the memory region that is
|
||||
// being evicted
|
||||
NvU64 addressIn; // virtual address that caused the eviction
|
||||
NvU64 size; // size of the memory region that being evicted
|
||||
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
|
||||
} UvmEventEvictionInfo_V2;
|
||||
|
||||
// TODO: Bug 1870362: [uvm] Provide virtual address and processor index in
|
||||
// AccessCounter events
|
||||
@@ -978,7 +1352,44 @@ typedef struct
|
||||
NvU32 bank;
|
||||
NvU64 address;
|
||||
NvU64 instancePtr;
|
||||
} UvmEventTestAccessCounterInfo;
|
||||
} UvmEventTestAccessCounterInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeAccessCounter helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
// See uvm_access_counter_buffer_entry_t for details
|
||||
NvU8 aperture;
|
||||
NvU8 instancePtrAperture;
|
||||
NvU8 isVirtual;
|
||||
NvU8 isFromCpu;
|
||||
NvU8 veId;
|
||||
|
||||
// The physical access counter notification was triggered on a managed
|
||||
// memory region. This is not set for virtual access counter notifications.
|
||||
NvU8 physOnManaged;
|
||||
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 srcIndex; // index of the gpu that received the access counter
|
||||
// notification
|
||||
NvU16 padding16bits;
|
||||
NvU32 value;
|
||||
NvU32 subGranularity;
|
||||
NvU32 tag;
|
||||
NvU32 bank;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address;
|
||||
NvU64 instancePtr;
|
||||
} UvmEventTestAccessCounterInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -998,30 +1409,64 @@ typedef struct
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo cpuFault;
|
||||
UvmEventMigrationInfo migration;
|
||||
UvmEventGpuFaultInfo gpuFault;
|
||||
UvmEventGpuFaultReplayInfo gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo fatalFault;
|
||||
UvmEventReadDuplicateInfo readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo thrashing;
|
||||
UvmEventThrottlingStartInfo throttlingStart;
|
||||
UvmEventThrottlingEndInfo throttlingEnd;
|
||||
UvmEventMapRemoteInfo mapRemote;
|
||||
UvmEventEvictionInfo eviction;
|
||||
UvmEventCpuFaultInfo_V1 cpuFault;
|
||||
UvmEventMigrationInfo_V1 migration;
|
||||
UvmEventGpuFaultInfo_V1 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo_V1 fatalFault;
|
||||
UvmEventReadDuplicateInfo_V1 readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo_V1 pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo_V1 thrashing;
|
||||
UvmEventThrottlingStartInfo_V1 throttlingStart;
|
||||
UvmEventThrottlingEndInfo_V1 throttlingEnd;
|
||||
UvmEventMapRemoteInfo_V1 mapRemote;
|
||||
UvmEventEvictionInfo_V1 eviction;
|
||||
} eventData;
|
||||
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
|
||||
UvmEventTestAccessCounterInfo accessCounter;
|
||||
UvmEventTestAccessCounterInfo_V1 accessCounter;
|
||||
UvmEventTestSplitInvalidateInfo splitInvalidate;
|
||||
} testEventData;
|
||||
};
|
||||
} UvmEventEntry;
|
||||
} UvmEventEntry_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo_V2 cpuFault;
|
||||
UvmEventMigrationInfo_V2 migration;
|
||||
UvmEventGpuFaultInfo_V2 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo_V2 fatalFault;
|
||||
UvmEventReadDuplicateInfo_V2 readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo_V2 pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo_V2 thrashing;
|
||||
UvmEventThrottlingStartInfo_V2 throttlingStart;
|
||||
UvmEventThrottlingEndInfo_V2 throttlingEnd;
|
||||
UvmEventMapRemoteInfo_V2 mapRemote;
|
||||
UvmEventEvictionInfo_V2 eviction;
|
||||
} eventData;
|
||||
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
|
||||
UvmEventTestAccessCounterInfo_V2 accessCounter;
|
||||
UvmEventTestSplitInvalidateInfo splitInvalidate;
|
||||
} testEventData;
|
||||
};
|
||||
} UvmEventEntry_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Type of time stamp used in the event entry:
|
||||
@@ -1060,7 +1505,12 @@ typedef enum
|
||||
UvmDebugAccessTypeWrite = 1,
|
||||
} UvmDebugAccessType;
|
||||
|
||||
typedef struct UvmEventControlData_tag {
|
||||
typedef enum {
|
||||
UvmToolsEventQueueVersion_V1 = 1,
|
||||
UvmToolsEventQueueVersion_V2 = 2,
|
||||
} UvmToolsEventQueueVersion;
|
||||
|
||||
typedef struct UvmEventControlData_V1_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
@@ -1070,7 +1520,30 @@ typedef struct UvmEventControlData_tag {
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData;
|
||||
} UvmToolsEventControlData_V1;
|
||||
|
||||
typedef struct UvmEventControlData_V2_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
|
||||
// entries between put_ahead and put_behind are currently being written
|
||||
volatile NvU32 put_ahead;
|
||||
volatile NvU32 put_behind;
|
||||
|
||||
// The version values are limited to UvmToolsEventQueueVersion and
|
||||
// initialized by UvmToolsCreateEventQueue().
|
||||
NvU32 version;
|
||||
NvU32 padding32Bits;
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData_V2;
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
|
||||
typedef UvmEventEntry_V1 UvmEventEntry;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UVM Tools forward types (handles) definitions
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -706,11 +706,6 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
|
||||
// mm is used to initialize the value of va_block_context->mm. NULL is allowed.
|
||||
void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);
|
||||
|
||||
// Return the preferred NUMA node ID for the block's policy.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
|
||||
// is returned.
|
||||
int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
// and page masks could simplify the below APIs and their implementations
|
||||
// at the cost of having to scan the whole mask for small regions.
|
||||
@@ -1546,7 +1541,11 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
|
||||
// The [src, src + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock
|
||||
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size);
|
||||
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_mem_t *dst,
|
||||
NvU64 src,
|
||||
size_t size);
|
||||
|
||||
// Initialize va block retry tracking
|
||||
void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
|
||||
@@ -2090,11 +2089,14 @@ void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
|
||||
|
||||
// Count how many processors have a copy of the given page resident in their
|
||||
// memory.
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get the processor with a resident copy of a page closest to the given
|
||||
// processor.
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor);
|
||||
|
||||
@@ -2127,6 +2129,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
|
||||
int nid,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk for the given page_index from the first available NUMA
|
||||
// node from the va_block. Should only be called for HMM va_blocks.
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_any_chunk_for_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Return the struct page * from the chunk corresponding to the given page_index
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
@@ -2241,6 +2248,7 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
// Return the maximum mapping protection for processor_id that will not require
|
||||
// any permision revocation on the rest of processors.
|
||||
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
|
||||
@@ -175,6 +175,14 @@ typedef struct
|
||||
// Scratch node mask. This follows the same rules as scratch_page_mask;
|
||||
nodemask_t scratch_node_mask;
|
||||
|
||||
// Available as scratch space for the internal APIs. This is like a caller-
|
||||
// save register: it shouldn't be used across function calls which also take
|
||||
// this va_block_context.
|
||||
uvm_processor_mask_t scratch_processor_mask;
|
||||
|
||||
// Temporary mask in block_add_eviction_mappings().
|
||||
uvm_processor_mask_t map_processors_eviction;
|
||||
|
||||
// State used by uvm_va_block_make_resident
|
||||
struct uvm_make_resident_context_struct
|
||||
{
|
||||
@@ -233,6 +241,16 @@ typedef struct
|
||||
// are removed as the operation progresses.
|
||||
uvm_page_mask_t revoke_running_page_mask;
|
||||
|
||||
// Mask used by block_gpu_split_2m and block_gpu_split_big to track
|
||||
// splitting of big PTEs but they are never called concurrently. This
|
||||
// mask can be used concurrently with other page masks.
|
||||
uvm_page_mask_t big_split_page_mask;
|
||||
|
||||
// Mask used by block_unmap_gpu to track non_uvm_lite_gpus which have
|
||||
// this block mapped. This mask can be used concurrently with other page
|
||||
// masks.
|
||||
uvm_processor_mask_t non_uvm_lite_gpus;
|
||||
|
||||
uvm_page_mask_t page_mask;
|
||||
uvm_page_mask_t filtered_page_mask;
|
||||
uvm_page_mask_t migratable_mask;
|
||||
@@ -276,6 +294,10 @@ typedef struct
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Temporary mask used in uvm_hmm_block_add_eviction_mappings().
|
||||
uvm_processor_mask_t map_processors_eviction;
|
||||
|
||||
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
|
||||
struct migrate_vma migrate_vma_args;
|
||||
#endif
|
||||
|
||||
@@ -1799,7 +1799,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
|
||||
|
||||
if (uvm_api_range_invalid(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -86,11 +86,13 @@ static void init_tools_data(uvm_va_space_t *va_space)
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.counters); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.counters + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v1); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues_v1 + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v2); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues_v2 + i);
|
||||
}
|
||||
|
||||
static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||||
static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *other_gpu;
|
||||
|
||||
@@ -104,7 +106,7 @@ static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
|
||||
|
||||
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1) {
|
||||
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1 || gpu->parent == other_gpu->parent) {
|
||||
NV_STATUS status = enable_peers(va_space, gpu, other_gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -324,10 +326,16 @@ static void unregister_gpu(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling)
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
|
||||
|
||||
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->non_faultable_processors, gpu->id);
|
||||
}
|
||||
|
||||
processor_mask_array_clear(va_space->can_access, gpu->id, gpu->id);
|
||||
processor_mask_array_clear(va_space->can_access, gpu->id, UVM_ID_CPU);
|
||||
@@ -514,7 +522,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
|
||||
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
}
|
||||
|
||||
// Check that all CPU/GPU affinity masks are empty
|
||||
@@ -604,7 +612,7 @@ uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProces
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid))
|
||||
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
|
||||
return gpu;
|
||||
}
|
||||
|
||||
@@ -663,7 +671,8 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
NvBool *numa_enabled,
|
||||
NvS32 *numa_node_id)
|
||||
NvS32 *numa_node_id,
|
||||
NvProcessorUuid *uuid_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
@@ -675,13 +684,15 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_uuid_copy(uuid_out, &gpu->uuid);
|
||||
|
||||
// Enabling access counters requires taking the ISR lock, so it is done
|
||||
// without holding the (deeper order) VA space lock. Enabling the counters
|
||||
// after dropping the VA space lock would create a window of time in which
|
||||
// another thread could see the GPU as registered, but access counters would
|
||||
// be disabled. Therefore, the counters are enabled before taking the VA
|
||||
// space lock.
|
||||
if (uvm_gpu_access_counters_required(gpu->parent)) {
|
||||
if (uvm_parent_gpu_access_counters_required(gpu->parent)) {
|
||||
status = uvm_gpu_access_counters_enable(gpu, va_space);
|
||||
if (status != NV_OK) {
|
||||
uvm_gpu_release(gpu);
|
||||
@@ -726,10 +737,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
||||
uvm_processor_mask_set(&va_space->faultable_processors, gpu->id);
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
|
||||
// System-wide atomics are enabled by default
|
||||
uvm_processor_mask_set(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
|
||||
uvm_processor_mask_set(&va_space->non_faultable_processors, gpu->id);
|
||||
}
|
||||
|
||||
// All GPUs have native atomics on their own memory
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
|
||||
@@ -785,7 +803,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
status = register_gpu_nvlink_peers(va_space, gpu);
|
||||
status = register_gpu_peers(va_space, gpu);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
@@ -822,9 +840,9 @@ done:
|
||||
if (status != NV_OK) {
|
||||
// There is no risk of disabling access counters on a previously
|
||||
// registered GPU: the enablement step would have failed before even
|
||||
// discovering that the GPU is already registed.
|
||||
if (uvm_gpu_access_counters_required(gpu->parent))
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
// discovering that the GPU is already registered.
|
||||
if (uvm_parent_gpu_access_counters_required(gpu->parent))
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
}
|
||||
@@ -876,15 +894,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
||||
// it from the VA space until we're done.
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
// If uvm_gpu_access_counters_required(gpu->parent) is true, a concurrent
|
||||
// registration could enable access counters after they are disabled here.
|
||||
// If uvm_parent_gpu_access_counters_required(gpu->parent) is true, a
|
||||
// concurrent registration could enable access counters after they are
|
||||
// disabled here.
|
||||
// The concurrent registration will fail later on if it acquires the VA
|
||||
// space lock before the unregistration does (because the GPU is still
|
||||
// registered) and undo the access counters enablement, or succeed if it
|
||||
// acquires the VA space lock after the unregistration does. Both outcomes
|
||||
// result on valid states.
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
|
||||
// mmap_lock is needed to establish CPU mappings to any pages evicted from
|
||||
// the GPU if accessed by CPU is set for them.
|
||||
@@ -1040,6 +1059,10 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
|
||||
processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
|
||||
}
|
||||
}
|
||||
else if (gpu0->parent == gpu1->parent) {
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu0->id, gpu1->id);
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu1->id, gpu0->id);
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_space_check_processors_masks(va_space));
|
||||
__set_bit(table_index, va_space->enabled_peers);
|
||||
@@ -1091,6 +1114,7 @@ static NV_STATUS retain_pcie_peers_from_uuids(uvm_va_space_t *va_space,
|
||||
static bool uvm_va_space_pcie_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
return !processor_mask_array_test(va_space->has_nvlink, gpu0->id, gpu1->id) &&
|
||||
gpu0->parent != gpu1->parent &&
|
||||
uvm_va_space_peer_enabled(va_space, gpu0, gpu1);
|
||||
}
|
||||
|
||||
|
||||
@@ -163,6 +163,10 @@ struct uvm_va_space_struct
|
||||
// faults.
|
||||
uvm_processor_mask_t faultable_processors;
|
||||
|
||||
// Mask of processors registered with the va space that don't support
|
||||
// faulting.
|
||||
uvm_processor_mask_t non_faultable_processors;
|
||||
|
||||
// This is a count of non fault capable processors with a GPU VA space
|
||||
// registered.
|
||||
NvU32 num_non_faultable_gpu_va_spaces;
|
||||
@@ -261,8 +265,8 @@ struct uvm_va_space_struct
|
||||
// Mask of processors that are participating in system-wide atomics
|
||||
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
||||
|
||||
// Mask of GPUs where access counters are enabled on this VA space
|
||||
uvm_processor_mask_t access_counters_enabled_processors;
|
||||
// Mask of physical GPUs where access counters are enabled on this VA space
|
||||
uvm_parent_processor_mask_t access_counters_enabled_processors;
|
||||
|
||||
// Array with information regarding CPU/GPU NUMA affinity. There is one
|
||||
// entry per CPU NUMA node. Entries in the array are populated sequentially
|
||||
@@ -308,7 +312,8 @@ struct uvm_va_space_struct
|
||||
|
||||
// Lists of counters listening for events on this VA space
|
||||
struct list_head counters[UVM_TOTAL_COUNTERS];
|
||||
struct list_head queues[UvmEventNumTypesAll];
|
||||
struct list_head queues_v1[UvmEventNumTypesAll];
|
||||
struct list_head queues_v2[UvmEventNumTypesAll];
|
||||
|
||||
// Node for this va_space in global subscribers list
|
||||
struct list_head node;
|
||||
@@ -399,7 +404,7 @@ static void uvm_va_space_processor_uuid(uvm_va_space_t *va_space, NvProcessorUui
|
||||
else {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
UVM_ASSERT(gpu);
|
||||
memcpy(uuid, uvm_gpu_uuid(gpu), sizeof(*uuid));
|
||||
memcpy(uuid, &gpu->uuid, sizeof(*uuid));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -472,9 +477,9 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space);
|
||||
uvm_mutex_unlock(&(__va_space)->serialize_writers_lock); \
|
||||
} while (0)
|
||||
|
||||
// Get a registered gpu by uuid. This restricts the search for GPUs, to those that
|
||||
// have been registered with a va_space. This returns NULL if the GPU is not present, or not
|
||||
// registered with the va_space.
|
||||
// Get a registered gpu by uuid. This restricts the search for GPUs, to those
|
||||
// that have been registered with a va_space. This returns NULL if the GPU is
|
||||
// not present, or not registered with the va_space.
|
||||
//
|
||||
// LOCKING: The VA space lock must be held.
|
||||
uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
|
||||
@@ -501,13 +506,19 @@ bool uvm_va_space_can_read_duplicate(uvm_va_space_t *va_space, uvm_gpu_t *changi
|
||||
// Register a gpu in the va space
|
||||
// Note that each gpu can be only registered once in a va space
|
||||
//
|
||||
// The input gpu_uuid is for the phyisical GPU. The user_rm_va_space argument
|
||||
// identifies the SMC partition if provided and SMC is enabled.
|
||||
//
|
||||
// This call returns whether the GPU memory is a NUMA node in the kernel and the
|
||||
// corresponding node id.
|
||||
// It also returns the GI UUID (if gpu_uuid is a SMC partition) or a copy of
|
||||
// gpu_uuid if the GPU is not SMC capable or SMC is not enabled.
|
||||
NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_va_space,
|
||||
NvBool *numa_enabled,
|
||||
NvS32 *numa_node_id);
|
||||
NvS32 *numa_node_id,
|
||||
NvProcessorUuid *uuid_out);
|
||||
|
||||
// Unregister a gpu from the va space
|
||||
NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
@@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
|
||||
}
|
||||
}
|
||||
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
|
||||
uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
// Initialize MMU interval notifiers for this process. This allows
|
||||
// mmu_interval_notifier_insert() to be called without holding the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -53,6 +53,7 @@
|
||||
(defined(CONFIG_CRYPTO_HMAC) || defined(CONFIG_CRYPTO_HMAC_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_ECDH) || defined(CONFIG_CRYPTO_ECDH_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_ECDSA) || defined(CONFIG_CRYPTO_ECDSA_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_RSA) || defined(CONFIG_CRYPTO_RSA_MODULE)) && \
|
||||
(defined(CONFIG_X509_CERTIFICATE_PARSER) || defined(CONFIG_X509_CERTIFICATE_PARSER_MODULE))
|
||||
#define NV_CONFIG_CRYPTO_PRESENT 1
|
||||
#endif
|
||||
@@ -151,4 +152,17 @@ bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public,
|
||||
bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size);
|
||||
|
||||
bool lkca_rsa_verify(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size);
|
||||
|
||||
bool lkca_rsa_pkcs1_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size);
|
||||
|
||||
bool lkca_rsa_pss_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size);
|
||||
|
||||
#endif
|
||||
|
||||
611
kernel-open/nvidia/libspdm_rsa.c
Normal file
611
kernel-open/nvidia/libspdm_rsa.c
Normal file
@@ -0,0 +1,611 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "internal_crypt_lib.h"
|
||||
#include "library/cryptlib.h"
|
||||
|
||||
#ifdef USE_LKCA
|
||||
#include <linux/module.h>
|
||||
#include <linux/mpi.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/internal/rsa.h>
|
||||
|
||||
/* ------------------------ Macros & Defines ------------------------------- */
|
||||
#define GET_MOST_SIGNIFICANT_BIT(keySize) (keySize > 0 ? ((keySize - 1) & 7) : 0)
|
||||
#define GET_ENC_MESSAGE_SIZE_BYTE(keySize) (keySize + 7) >> 3;
|
||||
#define PKCS1_MGF1_COUNTER_SIZE_BYTE (4)
|
||||
#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE (8)
|
||||
#define RSA_PSS_TRAILER_FIELD (0xbc)
|
||||
#define SHIFT_RIGHT_AND_GET_BYTE(val, x) ((val >> x) & 0xFF)
|
||||
#define BITS_TO_BYTES(b) (b >> 3)
|
||||
|
||||
static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
|
||||
|
||||
struct rsa_ctx
|
||||
{
|
||||
struct rsa_key key;
|
||||
bool pub_key_set;
|
||||
bool priv_key_set;
|
||||
int size;
|
||||
};
|
||||
#endif // #ifdef USE_LKCA
|
||||
|
||||
/*!
|
||||
* Creating and initializing a RSA context.
|
||||
*
|
||||
* @return : A void pointer points to a RSA context
|
||||
*
|
||||
*/
|
||||
void *libspdm_rsa_new
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return NULL;
|
||||
#else
|
||||
struct rsa_ctx *ctx;
|
||||
|
||||
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
|
||||
if (ctx == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->pub_key_set = false;
|
||||
ctx->priv_key_set = false;
|
||||
|
||||
return ctx;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* To free a RSA context.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
*
|
||||
*/
|
||||
void libspdm_rsa_free
|
||||
(
|
||||
void *rsa_context
|
||||
)
|
||||
{
|
||||
#ifdef USE_LKCA
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
|
||||
if (ctx != NULL)
|
||||
{
|
||||
if (ctx->key.n) kfree(ctx->key.n);
|
||||
if (ctx->key.e) kfree(ctx->key.e);
|
||||
if (ctx->key.d) kfree(ctx->key.d);
|
||||
if (ctx->key.q) kfree(ctx->key.q);
|
||||
if (ctx->key.p) kfree(ctx->key.p);
|
||||
if (ctx->key.dq) kfree(ctx->key.dq);
|
||||
if (ctx->key.dp) kfree(ctx->key.dp);
|
||||
if (ctx->key.qinv) kfree(ctx->key.qinv);
|
||||
kfree(ctx);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define rsa_set_key_case(a, a_sz, A) \
|
||||
case A: \
|
||||
{ \
|
||||
if (ctx->key.a) { \
|
||||
kfree(ctx->key.a); \
|
||||
} \
|
||||
ctx->key.a = shadow_num; \
|
||||
ctx->key.a_sz = bn_size; \
|
||||
break; \
|
||||
}
|
||||
/*!
|
||||
* To set key into RSA context.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param key_tag : Indicate key tag for RSA key
|
||||
* @param big_number : A big nuMber buffer to store rsa KEY
|
||||
* @param bn_size : The size of bug number
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
bool libspdm_rsa_set_key
|
||||
(
|
||||
void *rsa_context,
|
||||
const libspdm_rsa_key_tag_t key_tag,
|
||||
const uint8_t *big_number,
|
||||
size_t bn_size
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
uint8_t *shadow_num;
|
||||
|
||||
if (ctx == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick sanity check if tag is valid
|
||||
switch (key_tag)
|
||||
{
|
||||
case LIBSPDM_RSA_KEY_N:
|
||||
case LIBSPDM_RSA_KEY_E:
|
||||
case LIBSPDM_RSA_KEY_D:
|
||||
case LIBSPDM_RSA_KEY_Q:
|
||||
case LIBSPDM_RSA_KEY_P:
|
||||
case LIBSPDM_RSA_KEY_DP:
|
||||
case LIBSPDM_RSA_KEY_DQ:
|
||||
case LIBSPDM_RSA_KEY_Q_INV:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (big_number != NULL)
|
||||
{
|
||||
shadow_num = kmalloc(bn_size, GFP_KERNEL);
|
||||
if (shadow_num == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
memcpy(shadow_num, big_number, bn_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
shadow_num = NULL;
|
||||
bn_size = 0;
|
||||
}
|
||||
|
||||
switch (key_tag)
|
||||
{
|
||||
rsa_set_key_case(n, n_sz, LIBSPDM_RSA_KEY_N)
|
||||
rsa_set_key_case(e, e_sz, LIBSPDM_RSA_KEY_E)
|
||||
rsa_set_key_case(d, d_sz, LIBSPDM_RSA_KEY_D)
|
||||
rsa_set_key_case(q, q_sz, LIBSPDM_RSA_KEY_Q)
|
||||
rsa_set_key_case(p, p_sz, LIBSPDM_RSA_KEY_P)
|
||||
rsa_set_key_case(dq, dq_sz, LIBSPDM_RSA_KEY_DQ)
|
||||
rsa_set_key_case(dp, dp_sz, LIBSPDM_RSA_KEY_DP)
|
||||
rsa_set_key_case(qinv, qinv_sz, LIBSPDM_RSA_KEY_Q_INV)
|
||||
default:
|
||||
// We can't get here ever
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* Perform PKCS1 MGF1 operation.
|
||||
*
|
||||
* @param mask : A mask pointer to store return data
|
||||
* @param maskedDB_length : Indicate mask data block length
|
||||
* @param seed : A seed pointer to store random values
|
||||
* @param seed_length : The seed length
|
||||
* @param hash_nid : The hash NID
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
static bool NV_PKCS1_MGF1
|
||||
(
|
||||
uint8_t *mask,
|
||||
size_t maskedDB_length,
|
||||
const uint8_t *seed,
|
||||
size_t seed_length,
|
||||
size_t hash_nid
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
size_t mdLength;
|
||||
size_t counter;
|
||||
size_t outLength;
|
||||
uint8_t counterBuf[4];
|
||||
void *sha384_ctx = NULL;
|
||||
uint8_t hash_value[LIBSPDM_SHA384_DIGEST_SIZE];
|
||||
bool status = false;
|
||||
|
||||
if (mask == NULL || seed == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only support SHA384 for MGF1 now.
|
||||
if (hash_nid == LIBSPDM_CRYPTO_NID_SHA384)
|
||||
{
|
||||
mdLength = LIBSPDM_SHA384_DIGEST_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
sha384_ctx = libspdm_sha384_new();
|
||||
|
||||
if (sha384_ctx == NULL)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_new() failed \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (counter = 0, outLength = 0; outLength < maskedDB_length; counter++)
|
||||
{
|
||||
counterBuf[0] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 24);
|
||||
counterBuf[1] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 16);
|
||||
counterBuf[2] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 8);
|
||||
counterBuf[3] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 0);
|
||||
|
||||
status = libspdm_sha384_init(sha384_ctx);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_init() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, seed, seed_length);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() failed(seed) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, counterBuf, 4);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() failed(counterBuf) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (outLength + mdLength <= maskedDB_length)
|
||||
{
|
||||
status = libspdm_sha384_final(sha384_ctx, mask + outLength);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_final() failed (<= maskedDB_length) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
outLength += mdLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
status = libspdm_sha384_final(sha384_ctx, hash_value);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_final() failed(> maskedDB_length) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
memcpy(mask + outLength, hash_value, maskedDB_length - outLength);
|
||||
outLength = maskedDB_length;
|
||||
}
|
||||
}
|
||||
status = true;
|
||||
|
||||
_error_exit:
|
||||
libspdm_sha384_free(sha384_ctx);
|
||||
return status;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
0xbc : Trailer Field
|
||||
+-----------+
|
||||
| M |
|
||||
+-----------+
|
||||
|
|
||||
V
|
||||
Hash
|
||||
|
|
||||
V
|
||||
+--------+----------+----------+
|
||||
M' = |Padding1| mHash | salt |
|
||||
+--------+----------+----------+
|
||||
|--------------|---------------|
|
||||
|
|
||||
+--------+----------+ V
|
||||
DB = |Padding2| salt | Hash
|
||||
+--------+----------+ |
|
||||
| |
|
||||
V |
|
||||
xor <--- MGF <---|
|
||||
| |
|
||||
| |
|
||||
V V
|
||||
+-------------------+----------+----+
|
||||
EM = | maskedDB | H |0xbc|
|
||||
+-------------------+----------+----+
|
||||
|
||||
salt : The random number, we hardcode its size as hash size here.
|
||||
M' : The concatenation of padding1 + message hash + salt
|
||||
MGF : Mask generation function.
|
||||
A mask generation function takes an octet string of variable length
|
||||
and a desired output length as input, and outputs an octet string of
|
||||
the desired length
|
||||
MGF1 is a Mask Generation Function based on a hash function.
|
||||
|
||||
Padding1 : 8 zeros
|
||||
Padding2 : 0x01
|
||||
|
||||
The detail spec is at https://datatracker.ietf.org/doc/html/rfc2437
|
||||
*/
|
||||
|
||||
/*!
|
||||
* Set keys and call PKCS1_MGF1 to generate signature.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param hash_nid : The hash NID
|
||||
* @param message_hash : The pointer to message hash
|
||||
* @param signature : The pointer is used to store generated signature
|
||||
* @param sig_size : For input, a pointer store signature buffer size.
|
||||
* For output, a pointer store generate signature size.
|
||||
* @param salt_Length : The salt length for RSA-PSS algorithm
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
static bool nvRsaPaddingAddPkcs1PssMgf1
|
||||
(
|
||||
void *rsa_context,
|
||||
size_t hash_nid,
|
||||
const uint8_t *message_hash,
|
||||
size_t hash_size,
|
||||
uint8_t *signature,
|
||||
size_t *sig_size,
|
||||
int salt_length
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
bool status = false;
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
void *sha384_ctx = NULL;
|
||||
uint32_t keySize;
|
||||
uint32_t msBits;
|
||||
size_t emLength;
|
||||
uint8_t saltBuf[64];
|
||||
size_t maskedDB_length;
|
||||
size_t i;
|
||||
uint8_t *tmp_H;
|
||||
uint8_t *tmp_P;
|
||||
int rc;
|
||||
unsigned int ret_data_size;
|
||||
MPI mpi_n = NULL;
|
||||
MPI mpi_d = NULL;
|
||||
MPI mpi_c = mpi_alloc(0);
|
||||
MPI mpi_p = mpi_alloc(0);
|
||||
|
||||
// read modulus to BN struct
|
||||
mpi_n = mpi_read_raw_data(ctx->key.n, ctx->key.n_sz);
|
||||
if (mpi_n == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_n create failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// read private exponent to BN struct
|
||||
mpi_d = mpi_read_raw_data(ctx->key.d, ctx->key.d_sz);
|
||||
if (mpi_d == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_d create failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
keySize = mpi_n->nbits;
|
||||
msBits = GET_MOST_SIGNIFICANT_BIT(keySize);
|
||||
emLength = BITS_TO_BYTES(keySize);
|
||||
|
||||
if (msBits == 0)
|
||||
{
|
||||
*signature++ = 0;
|
||||
emLength--;
|
||||
}
|
||||
|
||||
if (emLength < hash_size + 2)
|
||||
{
|
||||
pr_err("%s : emLength < hash_size + 2 !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// Now, we only support salt_length == LIBSPDM_SHA384_DIGEST_SIZE
|
||||
if (salt_length != LIBSPDM_SHA384_DIGEST_SIZE ||
|
||||
hash_nid != LIBSPDM_CRYPTO_NID_SHA384)
|
||||
{
|
||||
pr_err("%s : Invalid salt_length (%x) \n", __FUNCTION__, salt_length);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
get_random_bytes(saltBuf, salt_length);
|
||||
|
||||
maskedDB_length = emLength - hash_size - 1;
|
||||
tmp_H = signature + maskedDB_length;
|
||||
sha384_ctx = libspdm_sha384_new();
|
||||
|
||||
if (sha384_ctx == NULL)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_new() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_init(sha384_ctx);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_init() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, zeroes, sizeof(zeroes));
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_update() with zeros failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, message_hash, hash_size);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() with message_hash failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (salt_length)
|
||||
{
|
||||
status = libspdm_sha384_update(sha384_ctx, saltBuf, salt_length);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_update() with saltBuf failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
}
|
||||
|
||||
status = libspdm_sha384_final(sha384_ctx, tmp_H);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_final() with tmp_H failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
/* Generate dbMask in place then perform XOR on it */
|
||||
status = NV_PKCS1_MGF1(signature, maskedDB_length, tmp_H, hash_size, hash_nid);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : NV_PKCS1_MGF1() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
tmp_P = signature;
|
||||
tmp_P += emLength - salt_length - hash_size - 2;
|
||||
*tmp_P++ ^= 0x1;
|
||||
|
||||
if (salt_length > 0)
|
||||
{
|
||||
for (i = 0; i < salt_length; i++)
|
||||
{
|
||||
*tmp_P++ ^= saltBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (msBits)
|
||||
{
|
||||
signature[0] &= 0xFF >> (8 - msBits);
|
||||
}
|
||||
|
||||
/* H is already in place so just set final 0xbc */
|
||||
signature[emLength - 1] = RSA_PSS_TRAILER_FIELD;
|
||||
|
||||
// read signature to BN struct
|
||||
mpi_p = mpi_read_raw_data(signature, emLength);
|
||||
if (mpi_p == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_p() create failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// Staring RSA encryption with private key over signature.
|
||||
rc = mpi_powm(mpi_c, mpi_p, mpi_d, mpi_n);
|
||||
if (rc != 0)
|
||||
{
|
||||
pr_err("%s : mpi_powm() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
rc = mpi_read_buffer(mpi_c, signature, *sig_size, &ret_data_size, NULL);
|
||||
if (rc != 0)
|
||||
{
|
||||
pr_err("%s : mpi_read_buffer() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (ret_data_size > *sig_size)
|
||||
{
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
*sig_size = ret_data_size;
|
||||
status = true;
|
||||
|
||||
_error_exit:
|
||||
|
||||
mpi_free(mpi_n);
|
||||
mpi_free(mpi_d);
|
||||
mpi_free(mpi_c);
|
||||
mpi_free(mpi_p);
|
||||
|
||||
libspdm_sha384_free(sha384_ctx);
|
||||
|
||||
return status;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* Perform RSA-PSS sigaature sign process with LKCA library.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param hash_nid : The hash NID
|
||||
* @param message_hash : The pointer to message hash
|
||||
* @param signature : The pointer is used to store generated signature
|
||||
* @param sig_size : For input, a pointer store signature buffer size.
|
||||
* For output, a pointer store generate signature size.
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
bool lkca_rsa_pss_sign
|
||||
(
|
||||
void *rsa_context,
|
||||
size_t hash_nid,
|
||||
const uint8_t *message_hash,
|
||||
size_t hash_size,
|
||||
uint8_t *signature,
|
||||
size_t *sig_size
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return true;
|
||||
#else
|
||||
return nvRsaPaddingAddPkcs1PssMgf1(rsa_context,
|
||||
hash_nid,
|
||||
message_hash,
|
||||
hash_size,
|
||||
signature,
|
||||
sig_size,
|
||||
LIBSPDM_SHA384_DIGEST_SIZE);
|
||||
#endif
|
||||
}
|
||||
|
||||
85
kernel-open/nvidia/libspdm_rsa_ext.c
Normal file
85
kernel-open/nvidia/libspdm_rsa_ext.c
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
|
||||
* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
|
||||
*/
|
||||
|
||||
/** @file
|
||||
* RSA Asymmetric Cipher Wrapper Implementation.
|
||||
*
|
||||
* This file implements following APIs which provide more capabilities for RSA:
|
||||
* 1) rsa_pss_sign
|
||||
*
|
||||
* RFC 8017 - PKCS #1: RSA Cryptography Specifications version 2.2
|
||||
**/
|
||||
|
||||
#include "internal_crypt_lib.h"
|
||||
#include "library/cryptlib.h"
|
||||
|
||||
/**
|
||||
* Carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme.
|
||||
*
|
||||
* This function carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme defined in
|
||||
* RSA PKCS#1 v2.2.
|
||||
*
|
||||
* The salt length is same as digest length.
|
||||
*
|
||||
* If the signature buffer is too small to hold the contents of signature, false
|
||||
* is returned and sig_size is set to the required buffer size to obtain the signature.
|
||||
*
|
||||
* If rsa_context is NULL, then return false.
|
||||
* If message_hash is NULL, then return false.
|
||||
* If hash_size need match the hash_nid. nid could be SHA256, SHA384, SHA512, SHA3_256, SHA3_384, SHA3_512.
|
||||
* If sig_size is large enough but signature is NULL, then return false.
|
||||
*
|
||||
* @param[in] rsa_context Pointer to RSA context for signature generation.
|
||||
* @param[in] hash_nid hash NID
|
||||
* @param[in] message_hash Pointer to octet message hash to be signed.
|
||||
* @param[in] hash_size size of the message hash in bytes.
|
||||
* @param[out] signature Pointer to buffer to receive RSA-SSA PSS signature.
|
||||
* @param[in, out] sig_size On input, the size of signature buffer in bytes.
|
||||
* On output, the size of data returned in signature buffer in bytes.
|
||||
*
|
||||
* @retval true signature successfully generated in RSA-SSA PSS.
|
||||
* @retval false signature generation failed.
|
||||
* @retval false sig_size is too small.
|
||||
*
|
||||
**/
|
||||
bool libspdm_rsa_pss_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size)
|
||||
{
|
||||
return lkca_rsa_pss_sign(rsa_context, hash_nid, message_hash, hash_size,
|
||||
signature, sig_size);
|
||||
}
|
||||
//
|
||||
// In RM, we just need sign process; so we stub verification function.
|
||||
// Verification function is needed in GSP code only,
|
||||
//
|
||||
bool libspdm_rsa_pss_verify(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
153
kernel-open/nvidia/nv-caps-imex.c
Normal file
153
kernel-open/nvidia/nv-caps-imex.c
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
extern int NVreg_ImexChannelCount;
|
||||
|
||||
static int nv_caps_imex_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_caps_imex_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file_operations g_nv_caps_imex_fops =
|
||||
{
|
||||
.owner = THIS_MODULE,
|
||||
.open = nv_caps_imex_open,
|
||||
.release = nv_caps_imex_release
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
NvBool initialized;
|
||||
struct cdev cdev;
|
||||
dev_t devno;
|
||||
} g_nv_caps_imex;
|
||||
|
||||
int NV_API_CALL nv_caps_imex_channel_get(int fd)
|
||||
{
|
||||
#if NV_FILESYSTEM_ACCESS_AVAILABLE
|
||||
struct file *file;
|
||||
struct inode *inode;
|
||||
int channel = -1;
|
||||
|
||||
file = fget(fd);
|
||||
if (file == NULL)
|
||||
{
|
||||
return channel;
|
||||
}
|
||||
|
||||
inode = NV_FILE_INODE(file);
|
||||
if (inode == NULL)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Make sure the fd belongs to the nv-caps-imex-drv */
|
||||
if (file->f_op != &g_nv_caps_imex_fops)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* minor number is same as channel */
|
||||
channel = MINOR(inode->i_rdev);
|
||||
|
||||
out:
|
||||
fput(file);
|
||||
|
||||
return channel;
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int NV_API_CALL nv_caps_imex_channel_count(void)
|
||||
{
|
||||
return NVreg_ImexChannelCount;
|
||||
}
|
||||
|
||||
int NV_API_CALL nv_caps_imex_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (g_nv_caps_imex.initialized)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex is already initialized.\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (NVreg_ImexChannelCount == 0)
|
||||
{
|
||||
nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = alloc_chrdev_region(&g_nv_caps_imex.devno, 0,
|
||||
NVreg_ImexChannelCount,
|
||||
"nvidia-caps-imex-channels");
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to create cdev.\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
cdev_init(&g_nv_caps_imex.cdev, &g_nv_caps_imex_fops);
|
||||
|
||||
g_nv_caps_imex.cdev.owner = THIS_MODULE;
|
||||
|
||||
rc = cdev_add(&g_nv_caps_imex.cdev, g_nv_caps_imex.devno,
|
||||
NVreg_ImexChannelCount);
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to add cdev.\n");
|
||||
goto cdev_add_fail;
|
||||
}
|
||||
|
||||
g_nv_caps_imex.initialized = NV_TRUE;
|
||||
|
||||
return 0;
|
||||
|
||||
cdev_add_fail:
|
||||
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void NV_API_CALL nv_caps_imex_exit(void)
|
||||
{
|
||||
if (!g_nv_caps_imex.initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
cdev_del(&g_nv_caps_imex.cdev);
|
||||
|
||||
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
|
||||
|
||||
g_nv_caps_imex.initialized = NV_FALSE;
|
||||
}
|
||||
34
kernel-open/nvidia/nv-caps-imex.h
Normal file
34
kernel-open/nvidia/nv-caps-imex.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NV_CAPS_IMEX_H_
|
||||
#define _NV_CAPS_IMEX_H_
|
||||
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
int NV_API_CALL nv_caps_imex_init(void);
|
||||
void NV_API_CALL nv_caps_imex_exit(void);
|
||||
int NV_API_CALL nv_caps_imex_channel_get(int fd);
|
||||
int NV_API_CALL nv_caps_imex_channel_count(void);
|
||||
|
||||
#endif /* _NV_CAPS_IMEX_H_ */
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -577,12 +577,9 @@ int nvidia_mmap_helper(
|
||||
//
|
||||
// This path is similar to the sysmem mapping code.
|
||||
// TODO: Refactor is needed as part of bug#2001704.
|
||||
// Use pfn_valid to determine whether the physical address has
|
||||
// backing struct page. This is used to isolate P8 from P9.
|
||||
//
|
||||
if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
|
||||
!IS_REG_OFFSET(nv, access_start, access_len) &&
|
||||
(pfn_valid(PFN_DOWN(mmap_start))))
|
||||
!IS_REG_OFFSET(nv, access_start, access_len))
|
||||
{
|
||||
ret = nvidia_mmap_numa(vma, mmap_context);
|
||||
if (ret)
|
||||
|
||||
@@ -839,6 +839,45 @@
|
||||
#define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
|
||||
#define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)
|
||||
|
||||
/*
|
||||
* Option: NVreg_ImexChannelCount
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* This option allows users to specify the number of IMEX (import/export)
|
||||
* channels. Within an IMEX domain, the channels allow sharing memory
|
||||
* securely in a multi-user environment using the CUDA driver's fabric handle
|
||||
* based APIs.
|
||||
*
|
||||
* An IMEX domain is either an OS instance or a group of securely
|
||||
* connected OS instances using the NVIDIA IMEX daemon. The option must
|
||||
* be set to the same value on each OS instance within the IMEX domain.
|
||||
*
|
||||
* An IMEX channel is a logical entity that is represented by a /dev node.
|
||||
* The IMEX channels are global resources within the IMEX domain. When
|
||||
* exporter and importer CUDA processes have been granted access to the
|
||||
* same IMEX channel, they can securely share memory.
|
||||
*
|
||||
* Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
|
||||
* the related CUDA APIs will fail with an insufficient permission error until
|
||||
* the /dev nodes are set up. The creation of these /dev nodes,
|
||||
* /dev/nvidia-caps-imex-channels/channelN, must be handled by the
|
||||
* administrator, where N is the minor number. The major number can be
|
||||
* queried from /proc/devices.
|
||||
*
|
||||
* nvidia-modprobe CLI support is available to set up the /dev nodes.
|
||||
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
|
||||
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
|
||||
*
|
||||
* Possible values:
|
||||
* 0 - Disable IMEX using CUDA driver's fabric handles.
|
||||
* N - N IMEX channels will be enabled in the driver to facilitate N
|
||||
* concurrent users. Default value is 2048 channels, and the current
|
||||
* maximum value is 20-bit, same as Linux dev_t's minor number limit.
|
||||
*/
|
||||
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
|
||||
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
|
||||
|
||||
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
|
||||
|
||||
/*
|
||||
@@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
|
||||
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
|
||||
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
|
||||
|
||||
/*
|
||||
*----------------registry database definition----------------------
|
||||
@@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -55,6 +55,7 @@
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-pat.h"
|
||||
#include "nv-dmabuf.h"
|
||||
#include "nv-caps-imex.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
@@ -825,11 +826,18 @@ static int __init nvidia_init_module(void)
|
||||
goto procfs_exit;
|
||||
}
|
||||
|
||||
rc = nv_caps_imex_init();
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
|
||||
goto caps_root_exit;
|
||||
}
|
||||
|
||||
rc = nv_module_init(&sp);
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
|
||||
goto caps_root_exit;
|
||||
goto caps_imex_exit;
|
||||
}
|
||||
|
||||
count = nvos_count_devices();
|
||||
@@ -941,6 +949,9 @@ drivers_exit:
|
||||
module_exit:
|
||||
nv_module_exit(sp);
|
||||
|
||||
caps_imex_exit:
|
||||
nv_caps_imex_exit();
|
||||
|
||||
caps_root_exit:
|
||||
nv_caps_root_exit();
|
||||
|
||||
@@ -967,6 +978,8 @@ static void __exit nvidia_exit_module(void)
|
||||
|
||||
nv_module_exit(sp);
|
||||
|
||||
nv_caps_imex_exit();
|
||||
|
||||
nv_caps_root_exit();
|
||||
|
||||
nv_procfs_exit();
|
||||
@@ -2040,7 +2053,7 @@ nvidia_close_callback(
|
||||
{
|
||||
nv_linux_state_t *nvl;
|
||||
nv_state_t *nv;
|
||||
nvidia_stack_t *sp;
|
||||
nvidia_stack_t *sp = nvlfp->sp;
|
||||
NvBool bRemove = NV_FALSE;
|
||||
|
||||
nvl = nvlfp->nvptr;
|
||||
@@ -2052,12 +2065,11 @@ nvidia_close_callback(
|
||||
*/
|
||||
|
||||
nv_free_file_private(nvlfp);
|
||||
nv_kmem_cache_free_stack(nvlfp->sp);
|
||||
nv_kmem_cache_free_stack(sp);
|
||||
return;
|
||||
}
|
||||
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
sp = nvlfp->sp;
|
||||
|
||||
rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
|
||||
|
||||
@@ -6050,6 +6062,131 @@ failed:
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
void NV_API_CALL nv_get_screen_info(
|
||||
nv_state_t *nv,
|
||||
NvU64 *pPhysicalAddress,
|
||||
NvU32 *pFbWidth,
|
||||
NvU32 *pFbHeight,
|
||||
NvU32 *pFbDepth,
|
||||
NvU32 *pFbPitch,
|
||||
NvU64 *pFbSize
|
||||
)
|
||||
{
|
||||
*pPhysicalAddress = 0;
|
||||
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
|
||||
|
||||
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
|
||||
if (num_registered_fb > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_registered_fb; i++)
|
||||
{
|
||||
if (!registered_fb[i])
|
||||
continue;
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
|
||||
{
|
||||
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
|
||||
*pFbWidth = registered_fb[i]->var.xres;
|
||||
*pFbHeight = registered_fb[i]->var.yres;
|
||||
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
|
||||
*pFbPitch = registered_fb[i]->fix.line_length;
|
||||
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the screen info is not found in the registered FBs then fallback
|
||||
* to the screen_info structure.
|
||||
*
|
||||
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
|
||||
* generic framebuffers so the new generic system-framebuffer drivers can
|
||||
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
|
||||
* device created by SYSFB_SIMPLEFB.
|
||||
*
|
||||
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
|
||||
* information required by nv_get_screen_info(), therefore you need to
|
||||
* fall back onto the screen_info structure.
|
||||
*
|
||||
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
|
||||
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
|
||||
*/
|
||||
|
||||
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
|
||||
/*
|
||||
* If there is not a framebuffer console, return 0 size.
|
||||
*
|
||||
* orig_video_isVGA is set to 1 during early Linux kernel
|
||||
* initialization, and then will be set to a value, such as
|
||||
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
|
||||
*/
|
||||
if (screen_info.orig_video_isVGA > 1)
|
||||
{
|
||||
NvU64 physAddr = screen_info.lfb_base;
|
||||
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
#endif
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
|
||||
{
|
||||
*pPhysicalAddress = physAddr;
|
||||
*pFbWidth = screen_info.lfb_width;
|
||||
*pFbHeight = screen_info.lfb_height;
|
||||
*pFbDepth = screen_info.lfb_depth;
|
||||
*pFbPitch = screen_info.lfb_linelength;
|
||||
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
struct pci_dev *pci_dev = nvl->pci_dev;
|
||||
int i;
|
||||
|
||||
if (pci_dev == NULL)
|
||||
return;
|
||||
|
||||
BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
|
||||
for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
|
||||
{
|
||||
int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
|
||||
struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
|
||||
struct resource *res = gpu_bar_res->child;
|
||||
|
||||
/*
|
||||
* Console resource will become child resource of pci-dev resource.
|
||||
* Check if child resource start address matches with expected
|
||||
* console start address.
|
||||
*/
|
||||
if ((res != NULL) &&
|
||||
NV_IS_CONSOLE_MAPPED(nv, res->start))
|
||||
{
|
||||
NvU32 res_name_len = strlen(res->name);
|
||||
|
||||
/*
|
||||
* The resource name ends with 'fb' (efifb, vesafb, etc.).
|
||||
* For simple-framebuffer, the resource name is 'BOOTFB'.
|
||||
* Confirm if the resources name either ends with 'fb' or 'FB'.
|
||||
*/
|
||||
if ((res_name_len > 2) &&
|
||||
!strcasecmp((res->name + res_name_len - 2), "fb"))
|
||||
{
|
||||
*pPhysicalAddress = res->start;
|
||||
*pFbSize = resource_size(res);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
module_init(nvidia_init_module);
|
||||
module_exit(nvidia_exit_module);
|
||||
|
||||
@@ -279,9 +279,11 @@ NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
|
||||
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush);
|
||||
|
||||
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo, NvBool bEnable);
|
||||
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
// Interface used for CCSL
|
||||
|
||||
|
||||
@@ -985,24 +985,30 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceGetNonReplayableFaults);
|
||||
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device)
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush)
|
||||
{
|
||||
nvidia_stack_t *sp = nvUvmGetSafeStack();
|
||||
NV_STATUS status;
|
||||
|
||||
status = rm_gpu_ops_flush_replayable_fault_buffer(sp, (gpuDeviceHandle)device);
|
||||
status = rm_gpu_ops_flush_replayable_fault_buffer(sp,
|
||||
pFaultInfo,
|
||||
bCopyAndFlush);
|
||||
|
||||
nvUvmFreeSafeStack(sp);
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceFlushReplayableFaultBuffer);
|
||||
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable)
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable)
|
||||
{
|
||||
nvidia_stack_t *sp = nvUvmGetSafeStack();
|
||||
NV_STATUS status;
|
||||
|
||||
status = rm_gpu_ops_toggle_prefetch_faults(sp, pFaultInfo, bEnable);
|
||||
status = rm_gpu_ops_toggle_prefetch_faults(sp,
|
||||
pFaultInfo,
|
||||
bEnable);
|
||||
|
||||
nvUvmFreeSafeStack(sp);
|
||||
return status;
|
||||
|
||||
@@ -30,18 +30,21 @@ NVIDIA_SOURCES += nvidia/nv-report-err.c
|
||||
NVIDIA_SOURCES += nvidia/nv-rsync.c
|
||||
NVIDIA_SOURCES += nvidia/nv-msi.c
|
||||
NVIDIA_SOURCES += nvidia/nv-caps.c
|
||||
NVIDIA_SOURCES += nvidia/nv-caps-imex.c
|
||||
NVIDIA_SOURCES += nvidia/nv_uvm_interface.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_aead.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_ecc.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hkdf.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rand.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_shash.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rsa.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_ec.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_x509.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rsa_ext.c
|
||||
NVIDIA_SOURCES += nvidia/nvlink_linux.c
|
||||
NVIDIA_SOURCES += nvidia/nvlink_caps.c
|
||||
NVIDIA_SOURCES += nvidia/linux_nvswitch.c
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "os-interface.h"
|
||||
#include "nv-linux.h"
|
||||
#include "nv-caps-imex.h"
|
||||
|
||||
#include "nv-time.h"
|
||||
|
||||
@@ -59,6 +60,8 @@ NvBool os_dma_buf_enabled = NV_TRUE;
|
||||
NvBool os_dma_buf_enabled = NV_FALSE;
|
||||
#endif // CONFIG_DMA_SHARED_BUFFER
|
||||
|
||||
NvBool os_imex_channel_is_supported = NV_TRUE;
|
||||
|
||||
void NV_API_CALL os_disable_console_access(void)
|
||||
{
|
||||
console_lock();
|
||||
@@ -1231,90 +1234,6 @@ NvBool NV_API_CALL os_is_efi_enabled(void)
|
||||
return efi_enabled(EFI_BOOT);
|
||||
}
|
||||
|
||||
void NV_API_CALL os_get_screen_info(
|
||||
NvU64 *pPhysicalAddress,
|
||||
NvU32 *pFbWidth,
|
||||
NvU32 *pFbHeight,
|
||||
NvU32 *pFbDepth,
|
||||
NvU32 *pFbPitch,
|
||||
NvU64 consoleBar1Address,
|
||||
NvU64 consoleBar2Address
|
||||
)
|
||||
{
|
||||
*pPhysicalAddress = 0;
|
||||
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
|
||||
|
||||
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
|
||||
if (num_registered_fb > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_registered_fb; i++)
|
||||
{
|
||||
if (!registered_fb[i])
|
||||
continue;
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
|
||||
(registered_fb[i]->fix.smem_start == consoleBar2Address))
|
||||
{
|
||||
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
|
||||
*pFbWidth = registered_fb[i]->var.xres;
|
||||
*pFbHeight = registered_fb[i]->var.yres;
|
||||
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
|
||||
*pFbPitch = registered_fb[i]->fix.line_length;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the screen info is not found in the registered FBs then fallback
|
||||
* to the screen_info structure.
|
||||
*
|
||||
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
|
||||
* generic framebuffers so the new generic system-framebuffer drivers can
|
||||
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
|
||||
* device created by SYSFB_SIMPLEFB.
|
||||
*
|
||||
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
|
||||
* information required by os_get_screen_info(), therefore you need to
|
||||
* fall back onto the screen_info structure.
|
||||
*
|
||||
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
|
||||
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
|
||||
*/
|
||||
|
||||
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
|
||||
/*
|
||||
* If there is not a framebuffer console, return 0 size.
|
||||
*
|
||||
* orig_video_isVGA is set to 1 during early Linux kernel
|
||||
* initialization, and then will be set to a value, such as
|
||||
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
|
||||
*/
|
||||
if (screen_info.orig_video_isVGA > 1)
|
||||
{
|
||||
NvU64 physAddr = screen_info.lfb_base;
|
||||
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
#endif
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if ((physAddr == consoleBar1Address) ||
|
||||
(physAddr == consoleBar2Address))
|
||||
{
|
||||
*pPhysicalAddress = physAddr;
|
||||
*pFbWidth = screen_info.lfb_width;
|
||||
*pFbHeight = screen_info.lfb_height;
|
||||
*pFbDepth = screen_info.lfb_depth;
|
||||
*pFbPitch = screen_info.lfb_linelength;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void NV_API_CALL os_dump_stack(void)
|
||||
{
|
||||
dump_stack();
|
||||
@@ -2182,6 +2101,22 @@ void NV_API_CALL os_nv_cap_close_fd
|
||||
nv_cap_close_fd(fd);
|
||||
}
|
||||
|
||||
NvS32 NV_API_CALL os_imex_channel_count
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
return nv_caps_imex_channel_count();
|
||||
}
|
||||
|
||||
NvS32 NV_API_CALL os_imex_channel_get
|
||||
(
|
||||
NvU64 descriptor
|
||||
)
|
||||
{
|
||||
return nv_caps_imex_channel_get((int)descriptor);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads the total memory and free memory of a NUMA node from the kernel.
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -87,59 +87,10 @@ static NV_STATUS get_io_ptes(struct vm_area_struct *vma,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Pins user IO pages that have been mapped to the user processes virtual
|
||||
* address space with remap_pfn_range.
|
||||
*
|
||||
* @param[in] vma VMA that contains the virtual address range given by the
|
||||
* start and the page count.
|
||||
* @param[in] start Beginning of the virtual address range of the IO pages.
|
||||
* @param[in] page_count Number of pages to pin from start.
|
||||
* @param[in,out] page_array Storage array for pointers to the pinned pages.
|
||||
* Must be large enough to contain at least page_count
|
||||
* pointers.
|
||||
*
|
||||
* @return NV_OK if the pages were pinned successfully, error otherwise.
|
||||
*/
|
||||
static NV_STATUS get_io_pages(struct vm_area_struct *vma,
|
||||
NvUPtr start,
|
||||
NvU64 page_count,
|
||||
struct page **page_array)
|
||||
{
|
||||
NV_STATUS rmStatus = NV_OK;
|
||||
NvU64 i, pinned = 0;
|
||||
unsigned long pfn;
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
{
|
||||
if ((nv_follow_pfn(vma, (start + (i * PAGE_SIZE)), &pfn) < 0) ||
|
||||
(!pfn_valid(pfn)))
|
||||
{
|
||||
rmStatus = NV_ERR_INVALID_ADDRESS;
|
||||
break;
|
||||
}
|
||||
|
||||
// Page-backed memory mapped to userspace with remap_pfn_range
|
||||
page_array[i] = pfn_to_page(pfn);
|
||||
get_page(page_array[i]);
|
||||
pinned++;
|
||||
}
|
||||
|
||||
if (pinned < page_count)
|
||||
{
|
||||
for (i = 0; i < pinned; i++)
|
||||
put_page(page_array[i]);
|
||||
rmStatus = NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory(
|
||||
void *address,
|
||||
NvU64 page_count,
|
||||
NvU64 **pte_array,
|
||||
void **page_array
|
||||
NvU64 **pte_array
|
||||
)
|
||||
{
|
||||
NV_STATUS rmStatus;
|
||||
@@ -187,18 +138,9 @@ NV_STATUS NV_API_CALL os_lookup_user_io_memory(
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (pfn_valid(pfn))
|
||||
{
|
||||
rmStatus = get_io_pages(vma, start, page_count, (struct page **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*page_array = (void *)result_array;
|
||||
}
|
||||
else
|
||||
{
|
||||
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*pte_array = (NvU64 *)result_array;
|
||||
}
|
||||
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*pte_array = (NvU64 *)result_array;
|
||||
|
||||
done:
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
Reference in New Issue
Block a user