mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 19:49:47 +00:00
Compare commits
8 Commits
560.31.02
...
570.133.07
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5e439fea4 | ||
|
|
25bef4626e | ||
|
|
129479b1b7 | ||
|
|
81fe4fb417 | ||
|
|
54d69484da | ||
|
|
9d0b0414a5 | ||
|
|
d5a0858f90 | ||
|
|
ed4be64962 |
@@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
|
||||
UBSAN_SANITIZE := y
|
||||
endif
|
||||
|
||||
#
|
||||
# Command to create a symbolic link, explicitly resolving the symlink target
|
||||
# to an absolute path to abstract away the difference between Linux < 6.13,
|
||||
# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
|
||||
# Linux >= 6.13, where the CWD is the external module source tree.
|
||||
#
|
||||
# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
|
||||
# kernel modules which use precompiled binary object files.
|
||||
#
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $(abspath $<) $@
|
||||
|
||||
|
||||
$(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(src)/$(_module)/$(_module).Kbuild))
|
||||
|
||||
@@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.31.02\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.133.07\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -52,6 +52,22 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
# If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
|
||||
# Look for the compiler specified there, and use it by default, if found.
|
||||
ifeq ($(origin CC),default)
|
||||
cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
|
||||
echo "$$CONFIG_CC_VERSION_TEXT"))
|
||||
|
||||
ifneq ($(cc_version_text),)
|
||||
ifeq ($(shell command -v $(cc_version_text)),)
|
||||
$(warning WARNING: Unable to locate the compiler $(cc_version_text) \
|
||||
from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
|
||||
else
|
||||
CC=$(cc_version_text)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
CC ?= cc
|
||||
LD ?= ld
|
||||
OBJDUMP ?= objdump
|
||||
@@ -65,6 +81,16 @@ else
|
||||
)
|
||||
endif
|
||||
|
||||
KERNEL_ARCH = $(ARCH)
|
||||
|
||||
ifneq ($(filter $(ARCH),i386 x86_64),)
|
||||
KERNEL_ARCH = x86
|
||||
else
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc),)
|
||||
$(error Unsupported architecture $(ARCH))
|
||||
endif
|
||||
endif
|
||||
|
||||
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
|
||||
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
|
||||
$(NV_KERNEL_MODULES))
|
||||
@@ -106,8 +132,9 @@ else
|
||||
# module symbols on which the Linux kernel's module resolution is dependent
|
||||
# and hence must be used whenever present.
|
||||
|
||||
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
|
||||
$(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
|
||||
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
|
||||
$(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
|
||||
$(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \
|
||||
$(KERNEL_OUTPUT)/scripts/module.lds
|
||||
NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))
|
||||
|
||||
|
||||
@@ -32,7 +32,10 @@
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
NV_FIRMWARE_TYPE_GSP_LOG,
|
||||
#if defined(NV_VMWARE)
|
||||
NV_FIRMWARE_TYPE_BINDATA
|
||||
#endif
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -45,6 +48,7 @@ typedef enum
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GB20X = 9,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
@@ -54,6 +58,7 @@ static inline const char *nv_firmware_chip_family_to_string(
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: return "gb20x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
@@ -84,6 +89,7 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
@@ -104,6 +110,7 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
@@ -119,7 +126,12 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_BINDATA)
|
||||
{
|
||||
return NV_FIRMWARE_FOR_NAME("bindata_image");
|
||||
}
|
||||
#endif
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_FOR_NAME)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -128,6 +128,9 @@ typedef struct nv_ioctl_register_fd
|
||||
|
||||
#define NV_DMABUF_EXPORT_MAX_HANDLES 128
|
||||
|
||||
#define NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT 0
|
||||
#define NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE 1
|
||||
|
||||
typedef struct nv_ioctl_export_to_dma_buf_fd
|
||||
{
|
||||
int fd;
|
||||
@@ -136,6 +139,7 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
|
||||
NvU32 numObjects;
|
||||
NvU32 index;
|
||||
NvU64 totalSize NV_ALIGN_BYTES(8);
|
||||
NvU8 mappingType;
|
||||
NvHandle handles[NV_DMABUF_EXPORT_MAX_HANDLES];
|
||||
NvU64 offsets[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
|
||||
NvU64 sizes[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -43,6 +43,8 @@ struct nv_kthread_q
|
||||
atomic_t main_loop_should_exit;
|
||||
|
||||
struct task_struct *q_kthread;
|
||||
|
||||
bool is_unload_flush_ongoing;
|
||||
};
|
||||
|
||||
struct nv_kthread_q_item
|
||||
|
||||
@@ -231,12 +231,6 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
const char *, va_list);
|
||||
#endif
|
||||
|
||||
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
|
||||
#include <asm/eeh.h>
|
||||
#define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled()
|
||||
#define NV_PCI_ERROR_RECOVERY
|
||||
#endif
|
||||
|
||||
#if defined(NV_ASM_SET_MEMORY_H_PRESENT)
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
@@ -609,7 +603,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
|
||||
#define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \
|
||||
{ \
|
||||
(ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \
|
||||
(ptr) = (unsigned long) alloc_pages_node(nid, gfp_mask, order); \
|
||||
}
|
||||
|
||||
#define NV_GET_FREE_PAGES(ptr, order, gfp_mask) \
|
||||
@@ -724,6 +718,7 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
|
||||
#define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
|
||||
#define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff)
|
||||
#define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start)
|
||||
@@ -880,16 +875,6 @@ typedef void irqreturn_t;
|
||||
#define PCI_CAP_ID_EXP 0x10
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On Linux on PPC64LE enable basic support for Linux PCI error recovery (see
|
||||
* Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error
|
||||
* notification and data collection, not actual recovery of the device.
|
||||
*/
|
||||
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
|
||||
#include <asm/eeh.h>
|
||||
#define NV_PCI_ERROR_RECOVERY
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the host OS has page sizes larger than 4KB, we may have a security
|
||||
* problem. Registers are typically grouped in 4KB pages, but if there are
|
||||
@@ -951,14 +936,14 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
|
||||
{
|
||||
int ret = -1;
|
||||
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
|
||||
ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size,
|
||||
ret = nv_remap_page_range(vma, start, phys_addr, size,
|
||||
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
#else
|
||||
ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT),
|
||||
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
#endif
|
||||
return ret;
|
||||
@@ -1207,6 +1192,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
NvBool coherent : 1;
|
||||
NvBool carveout : 1;
|
||||
} flags;
|
||||
unsigned int cache_type;
|
||||
unsigned int num_pages;
|
||||
@@ -1417,8 +1403,6 @@ typedef struct nv_dma_map_s {
|
||||
0 ? NV_OK : NV_ERR_OPERATING_SYSTEM)
|
||||
#endif
|
||||
|
||||
typedef struct nv_ibmnpu_info nv_ibmnpu_info_t;
|
||||
|
||||
typedef struct nv_work_s {
|
||||
struct work_struct task;
|
||||
void *data;
|
||||
@@ -1466,7 +1450,6 @@ struct nv_dma_device {
|
||||
} addressable_range;
|
||||
|
||||
struct device *dev;
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
/* Properties of the coherent link */
|
||||
@@ -1515,9 +1498,6 @@ typedef struct nv_linux_state_s {
|
||||
struct device *dev;
|
||||
struct pci_dev *pci_dev;
|
||||
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
/* coherent link information */
|
||||
coherent_link_info_t coherent_link_info;
|
||||
|
||||
@@ -1833,27 +1813,13 @@ static inline int nv_is_control_device(struct inode *inode)
|
||||
return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
|
||||
}
|
||||
|
||||
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
|
||||
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) || defined(NV_DEVICE_VM_BUILD)
|
||||
#define NV_VGX_HYPER
|
||||
#if defined(NV_XEN_IOEMU_INJECT_MSI)
|
||||
#include <xen/ioemu.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index)
|
||||
{
|
||||
NvU64 bus_addr = 0;
|
||||
#if defined(NV_PCI_BUS_ADDRESS_PRESENT)
|
||||
bus_addr = pci_bus_address(dev, bar_index);
|
||||
#elif defined(CONFIG_PCI)
|
||||
struct pci_bus_region region;
|
||||
|
||||
pcibios_resource_to_bus(dev, ®ion, &dev->resource[bar_index]);
|
||||
bus_addr = region.start;
|
||||
#endif
|
||||
return bus_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrements the usage count of the allocation, and moves the allocation to
|
||||
* the given nvlfp's free list if the usage count drops to zero.
|
||||
@@ -1884,59 +1850,6 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
|
||||
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Starting on Power9 systems, DMA addresses for NVLink are no longer
|
||||
* the same as used over PCIe.
|
||||
*
|
||||
* Power9 supports a 56-bit Real Address. This address range is compressed
|
||||
* when accessed over NVLink to allow the GPU to access all of memory using
|
||||
* its 47-bit Physical address.
|
||||
*
|
||||
* If there is an NPU device present on the system, it implies that NVLink
|
||||
* sysmem links are present and we need to apply the required address
|
||||
* conversion for NVLink within the driver.
|
||||
*
|
||||
* See Bug 1920398 for further background and details.
|
||||
*
|
||||
* Note, a deviation from the documented compression scheme is that the
|
||||
* upper address bits (i.e. bit 56-63) instead of being set to zero are
|
||||
* preserved during NVLink address compression so the orignal PCIe DMA
|
||||
* address can be reconstructed on expansion. These bits can be safely
|
||||
* ignored on NVLink since they are truncated by the GPU.
|
||||
*
|
||||
* Bug 1968345: As a performance enhancement it is the responsibility of
|
||||
* the caller on PowerPC platforms to check for presence of an NPU device
|
||||
* before the address transformation is applied.
|
||||
*/
|
||||
static inline NvU64 nv_compress_nvlink_addr(NvU64 addr)
|
||||
{
|
||||
NvU64 addr47 = addr;
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
addr47 = addr & ((1ULL << 43) - 1);
|
||||
addr47 |= (addr & (0x3ULL << 45)) >> 2;
|
||||
WARN_ON(addr47 & (1ULL << 44));
|
||||
addr47 |= (addr & (0x3ULL << 49)) >> 4;
|
||||
addr47 |= addr & ~((1ULL << 56) - 1);
|
||||
#endif
|
||||
|
||||
return addr47;
|
||||
}
|
||||
|
||||
static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47)
|
||||
{
|
||||
NvU64 addr = addr47;
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
addr = addr47 & ((1ULL << 43) - 1);
|
||||
addr |= (addr47 & (3ULL << 43)) << 2;
|
||||
addr |= (addr47 & (3ULL << 45)) << 4;
|
||||
addr |= addr47 & ~((1ULL << 56) - 1);
|
||||
#endif
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
// Default flags for ISRs
|
||||
static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -36,5 +36,6 @@ int nv_pci_count_devices(void);
|
||||
NvU8 nv_find_pci_capability(struct pci_dev *, NvU8);
|
||||
int nvidia_dev_get_pci_info(const NvU8 *, struct pci_dev **, NvU64 *, NvU64 *);
|
||||
nv_linux_state_t * find_pci(NvU32, NvU8, NvU8, NvU8);
|
||||
NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct device *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -59,6 +59,8 @@ NV_STATUS nv_uvm_resume (void);
|
||||
void nv_uvm_notify_start_device (const NvU8 *uuid);
|
||||
void nv_uvm_notify_stop_device (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_event_interrupt (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_drain_P2P (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_resume_P2P (const NvU8 *uuid);
|
||||
|
||||
/* Move these to nv.h once implemented by other UNIX platforms */
|
||||
NvBool nvidia_get_gpuid_list (NvU32 *gpu_ids, NvU32 *gpu_count);
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <nv-ioctl.h>
|
||||
#include <nv-ioctl-numa.h>
|
||||
#include <nvmisc.h>
|
||||
#include <os/nv_memory_area.h>
|
||||
|
||||
extern nv_cap_t *nvidia_caps_root;
|
||||
|
||||
@@ -279,8 +280,7 @@ typedef struct nv_usermap_access_params_s
|
||||
NvU64 offset;
|
||||
NvU64 *page_array;
|
||||
NvU64 num_pages;
|
||||
NvU64 mmap_start;
|
||||
NvU64 mmap_size;
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
@@ -296,8 +296,7 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
NvU64 page_index;
|
||||
NvU64 *page_array;
|
||||
NvU64 num_pages;
|
||||
NvU64 mmap_start;
|
||||
NvU64 mmap_size;
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
@@ -330,7 +329,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
#define NV_MAX_SOC_IRQS 10
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
|
||||
@@ -369,6 +368,8 @@ typedef struct nv_state_t
|
||||
{
|
||||
NvBool valid;
|
||||
NvU8 uuid[GPU_UUID_LEN];
|
||||
NvBool pci_uuid_read_attempted;
|
||||
NV_STATUS pci_uuid_status;
|
||||
} nv_uuid_cache;
|
||||
void *handle;
|
||||
|
||||
@@ -480,6 +481,8 @@ typedef struct nv_state_t
|
||||
/* Bool to check if the GPU has a coherent sysmem link */
|
||||
NvBool coherent;
|
||||
|
||||
/* OS detected GPU has ATS capability */
|
||||
NvBool ats_support;
|
||||
/*
|
||||
* NUMA node ID of the CPU to which the GPU is attached.
|
||||
* Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
|
||||
@@ -495,6 +498,9 @@ typedef struct nv_state_t
|
||||
NvU32 dispIsoStreamId;
|
||||
NvU32 dispNisoStreamId;
|
||||
} iommus;
|
||||
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
@@ -535,16 +541,18 @@ typedef struct UvmGpuAddressSpaceInfo_tag *nvgpuAddressSpaceInfo_t;
|
||||
typedef struct UvmGpuAllocInfo_tag *nvgpuAllocInfo_t;
|
||||
typedef struct UvmGpuP2PCapsParams_tag *nvgpuP2PCapsParams_t;
|
||||
typedef struct UvmGpuFbInfo_tag *nvgpuFbInfo_t;
|
||||
typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
|
||||
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
|
||||
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
|
||||
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
|
||||
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
|
||||
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
|
||||
typedef struct UvmGpuExternalMappingInfo_tag *nvgpuExternalMappingInfo_t;
|
||||
typedef struct UvmGpuExternalPhysAddrInfo_tag *nvgpuExternalPhysAddrInfo_t;
|
||||
typedef struct UvmGpuChannelResourceInfo_tag *nvgpuChannelResourceInfo_t;
|
||||
typedef struct UvmGpuChannelInstanceInfo_tag *nvgpuChannelInstanceInfo_t;
|
||||
typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindParams_t;
|
||||
@@ -569,7 +577,8 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
// Unused 0x0400
|
||||
/* To be set when an FLR needs to be triggered after device shut down. */
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
// Unused 0x2000
|
||||
@@ -612,6 +621,7 @@ typedef struct
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
const char *s0ix_status;
|
||||
const char *db_support;
|
||||
} nv_power_info_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
@@ -757,6 +767,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* driver internal interfaces
|
||||
*/
|
||||
@@ -783,7 +794,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, NvBool, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
@@ -812,7 +823,6 @@ NV_STATUS NV_API_CALL nv_dma_map_mmio (nv_dma_device_t *, NvU64, NvU6
|
||||
void NV_API_CALL nv_dma_unmap_mmio (nv_dma_device_t *, NvU64, NvU64);
|
||||
|
||||
void NV_API_CALL nv_dma_cache_invalidate (nv_dma_device_t *, void *);
|
||||
void NV_API_CALL nv_dma_enable_nvlink (nv_dma_device_t *);
|
||||
|
||||
NvS32 NV_API_CALL nv_start_rc_timer (nv_state_t *);
|
||||
NvS32 NV_API_CALL nv_stop_rc_timer (nv_state_t *);
|
||||
@@ -839,9 +849,7 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method (nv_state_t *, NvU32 *, NvU32,
|
||||
|
||||
NV_STATUS NV_API_CALL nv_log_error (nv_state_t *, NvU32, const char *, va_list);
|
||||
|
||||
NvU64 NV_API_CALL nv_get_dma_start_address (nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_set_primary_vga_status(nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
@@ -854,19 +862,8 @@ void NV_API_CALL nv_put_file_private(void *);
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
|
||||
void NV_API_CALL nv_wait_for_ibmnpu_rsync(nv_state_t *nv);
|
||||
|
||||
void NV_API_CALL nv_ibmnpu_cache_flush_range(nv_state_t *nv, NvU64, NvU64);
|
||||
|
||||
void NV_API_CALL nv_p2p_free_platform_data(void *data);
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
NV_STATUS NV_API_CALL nv_get_nvlink_line_rate (nv_state_t *, NvU32 *);
|
||||
#endif
|
||||
|
||||
NV_STATUS NV_API_CALL nv_revoke_gpu_mappings (nv_state_t *);
|
||||
void NV_API_CALL nv_acquire_mmap_lock (nv_state_t *);
|
||||
void NV_API_CALL nv_release_mmap_lock (nv_state_t *);
|
||||
@@ -904,6 +901,9 @@ void NV_API_CALL nv_dma_release_dma_buf (nv_dma_buf_t *);
|
||||
|
||||
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_schedule_uvm_drain_p2p (NvU8 *);
|
||||
void NV_API_CALL nv_schedule_uvm_resume_p2p (NvU8 *);
|
||||
|
||||
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
|
||||
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
|
||||
|
||||
@@ -994,18 +994,24 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *,
|
||||
NvHandle, NvHandle, MemoryRange,
|
||||
NvU8, void *, NvBool, MemoryArea *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *,
|
||||
NvHandle, NvHandle, NvU8, void *,
|
||||
NvBool, MemoryArea);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *,
|
||||
nv_state_t *, NvHandle, NvHandle,
|
||||
NvU8, NvHandle *, NvHandle *,
|
||||
NvHandle *, void **, NvBool *);
|
||||
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
|
||||
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
@@ -1022,7 +1028,6 @@ NvBool NV_API_CALL rm_is_device_sequestered(nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_check_for_gpu_surprise_removal(nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_set_external_kernel_client_count(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_schedule_gpu_wakeup(nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_iommu_needed_for_sriov(nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_disable_iomap_wc(void);
|
||||
|
||||
void NV_API_CALL rm_init_dynamic_power_management(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
@@ -1039,12 +1044,14 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
|
||||
NvU32 *, NvU32 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
@@ -592,6 +592,14 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
NV_ERR_NOT_READY
|
||||
NV_ERR_INVALID_LOCK_STATE
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NVLINK_FABRIC_NOT_READY
|
||||
NV_ERR_NVLINK_FABRIC_FAILURE
|
||||
NV_ERR_GPU_MEMORY_ONLINING_FAILURE
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
|
||||
UvmGpuCaps *caps);
|
||||
@@ -1048,7 +1056,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
const UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDisableAccessCntr
|
||||
@@ -1085,6 +1093,22 @@ NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvm
|
||||
//
|
||||
void nvUvmInterfaceDeRegisterUvmOps(void);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceGetNvlinkInfo
|
||||
|
||||
Gets NVLINK information from RM.
|
||||
|
||||
Arguments:
|
||||
device[IN] - GPU device handle
|
||||
nvlinkInfo [OUT] - Pointer to NvlinkInfo structure
|
||||
|
||||
Error codes:
|
||||
NV_ERROR
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetNvlinkInfo(uvmGpuDeviceHandle device,
|
||||
UvmGpuNvlinkInfo *nvlinkInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceP2pObjectCreate
|
||||
|
||||
@@ -1161,6 +1185,48 @@ NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvU64 size,
|
||||
UvmGpuExternalMappingInfo *gpuExternalMappingInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceGetExternalAllocPhysAddrs
|
||||
|
||||
The interface builds the RM physical addrs using the provided input parameters.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - vaSpace handle.
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PhysAddrs
|
||||
should be built.
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
gpuExternalMappingInfo[IN/OUT] - See nv_uvm_types.h for more information.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_INVALID_OBJECT_HANDLE - Invalid memory handle is passed.
|
||||
NV_ERR_NOT_SUPPORTED - Functionality is not supported (see comments in nv_gpu_ops.c)
|
||||
NV_ERR_INVALID_BASE - offset is beyond the allocation size
|
||||
NV_ERR_INVALID_LIMIT - (offset + size) is beyond the allocation size.
|
||||
NV_ERR_BUFFER_TOO_SMALL - gpuExternalMappingInfo.physAddrBufferSize is insufficient to
|
||||
store single physAddr.
|
||||
NV_ERR_NOT_READY - Returned when querying the physAddrs requires a deferred setup
|
||||
which has not yet completed. It is expected that the caller
|
||||
will reattempt the call until a different code is returned.
|
||||
As an example, multi-node systems which require querying
|
||||
physAddrs from the Fabric Manager may return this code.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetExternalAllocPhysAddrs(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvHandle hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 size,
|
||||
UvmGpuExternalPhysAddrInfo *gpuExternalPhysAddrsInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceRetainChannel
|
||||
|
||||
@@ -1462,6 +1528,16 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceReportFatalError
|
||||
|
||||
Reports a global fatal error so RM can inform the clients that a node reboot
|
||||
is necessary to recover from this error. This function can be called from
|
||||
any lock environment, bottom half or non-interrupt context.
|
||||
|
||||
*/
|
||||
void nvUvmInterfaceReportFatalError(NV_STATUS error);
|
||||
|
||||
/*******************************************************************************
|
||||
Cryptography Services Library (CSL) Interface
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -543,6 +543,36 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
NvU32 pteSize;
|
||||
} UvmGpuExternalMappingInfo;
|
||||
|
||||
typedef struct UvmGpuExternalPhysAddrInfo_tag
|
||||
{
|
||||
// In: Virtual permissions. Returns
|
||||
// NV_ERR_INVALID_ACCESS_TYPE if input is
|
||||
// inaccurate
|
||||
UvmRmGpuMappingType mappingType;
|
||||
|
||||
// In: Size of the buffer to store PhysAddrs (in bytes).
|
||||
NvU64 physAddrBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PhysAddrs.
|
||||
// Out: The interface will fill the buffer with PhysAddrs
|
||||
NvU64 *physAddrBuffer;
|
||||
|
||||
// Out: Number of PhysAddrs filled in to the buffer.
|
||||
NvU64 numWrittenPhysAddrs;
|
||||
|
||||
// Out: Number of PhysAddrs remaining to be filled
|
||||
// if the buffer is not sufficient to accommodate
|
||||
// requested PhysAddrs.
|
||||
NvU64 numRemainingPhysAddrs;
|
||||
} UvmGpuExternalPhysAddrInfo;
|
||||
|
||||
typedef struct UvmGpuP2PCapsParams_tag
|
||||
{
|
||||
// Out: peerId[i] contains gpu[i]'s peer id of gpu[1 - i]. Only defined if
|
||||
@@ -590,19 +620,12 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_APM,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_COUNT
|
||||
} UvmGpuConfComputeMode;
|
||||
|
||||
typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
// Is key rotation enabled for UVM keys
|
||||
// Out: true if Confidential Computing is enabled on the GPU
|
||||
NvBool bConfComputingEnabled;
|
||||
|
||||
// Out: true if key rotation is enabled (for UVM keys) on the GPU
|
||||
NvBool bKeyRotationEnabled;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
@@ -660,6 +683,9 @@ typedef struct UvmGpuInfo_tag
|
||||
// Maximum number of TPCs per GPC
|
||||
NvU32 maxTpcPerGpcCount;
|
||||
|
||||
// Number of access counter buffers.
|
||||
NvU32 accessCntrBufferCount;
|
||||
|
||||
// NV_TRUE if SMC is enabled on this GPU.
|
||||
NvBool smcEnabled;
|
||||
|
||||
@@ -713,6 +739,8 @@ typedef struct UvmGpuInfo_tag
|
||||
// to NVSwitch peers.
|
||||
NvU64 nvswitchEgmMemoryWindowStart;
|
||||
|
||||
// GPU supports ATS capability
|
||||
NvBool atsSupport;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@@ -721,10 +749,15 @@ typedef struct UvmGpuFbInfo_tag
|
||||
// RM regions that are not registered with PMA either.
|
||||
NvU64 maxAllocatableAddress;
|
||||
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
|
||||
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
|
||||
NvU64 staticBar1Size; // The size of the static mapping
|
||||
NvU32 heapStart; // The start offset of heap in KB, helpful for MIG
|
||||
// systems
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
typedef struct UvmGpuEccInfo_tag
|
||||
@@ -736,6 +769,15 @@ typedef struct UvmGpuEccInfo_tag
|
||||
NvBool bEccEnabled;
|
||||
} UvmGpuEccInfo;
|
||||
|
||||
typedef struct UvmGpuNvlinkInfo_tag
|
||||
{
|
||||
unsigned nvlinkMask;
|
||||
unsigned nvlinkOffset;
|
||||
void *nvlinkReadLocation;
|
||||
NvBool *nvlinkErrorNotifier;
|
||||
NvBool bNvlinkRecoveryEnabled;
|
||||
} UvmGpuNvlinkInfo;
|
||||
|
||||
typedef struct UvmPmaAllocationOptions_tag
|
||||
{
|
||||
NvU32 flags;
|
||||
@@ -852,6 +894,41 @@ typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct
|
||||
typedef void (*uvmEventIsrTopHalf_t) (void);
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventDrainP2P
|
||||
This function will be called by the GPU driver to signal to UVM that the
|
||||
GPU has encountered an uncontained error, and all peer work must be drained
|
||||
to recover. When it is called, the following assumptions/guarantees are
|
||||
valid/made:
|
||||
|
||||
* Impacted user channels have been preempted and disabled
|
||||
* UVM channels are still running normally and will continue to do
|
||||
so unless an unrecoverable error is hit on said channels
|
||||
* UVM must not return from this function until all enqueued work on
|
||||
* peer channels has drained
|
||||
* In the context of this function call, RM will still service faults
|
||||
* UVM must prevent new peer work from being enqueued until the
|
||||
uvmEventResumeP2P callback is issued
|
||||
|
||||
Returns:
|
||||
NV_OK if UVM has idled peer work and will prevent new peer workloads.
|
||||
NV_ERR_TIMEOUT if peer work was unable to be drained within a timeout
|
||||
XXX NV_ERR_* for any other failure (TBD)
|
||||
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventResumeP2P
|
||||
This function will be called by the GPU driver to signal to UVM that the
|
||||
GPU has recovered from the previously reported uncontained NVLINK error.
|
||||
When it is called, the following assumptions/guarantees are valid/made:
|
||||
|
||||
* UVM is again allowed to enqueue peer work
|
||||
* UVM channels are still running normally
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
struct UvmOpsUvmEvents
|
||||
{
|
||||
uvmEventSuspend_t suspend;
|
||||
@@ -864,6 +941,8 @@ struct UvmOpsUvmEvents
|
||||
uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
|
||||
uvmEventServiceInterrupt_t serviceInterrupt;
|
||||
#endif
|
||||
uvmEventDrainP2P_t drainP2P;
|
||||
uvmEventResumeP2P_t resumeP2P;
|
||||
};
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
|
||||
@@ -1024,24 +1103,9 @@ typedef enum
|
||||
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
|
||||
} UVM_ACCESS_COUNTER_GRANULARITY;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
|
||||
} UVM_ACCESS_COUNTER_USE_LIMIT;
|
||||
|
||||
typedef struct UvmGpuAccessCntrConfig_tag
|
||||
{
|
||||
NvU32 mimcGranularity;
|
||||
|
||||
NvU32 momcGranularity;
|
||||
|
||||
NvU32 mimcUseLimit;
|
||||
|
||||
NvU32 momcUseLimit;
|
||||
|
||||
NvU32 granularity;
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
@@ -1071,11 +1135,13 @@ typedef UvmGpuAccessCntrConfig gpuAccessCntrConfig;
|
||||
typedef UvmGpuFaultInfo gpuFaultInfo;
|
||||
typedef UvmGpuMemoryInfo gpuMemoryInfo;
|
||||
typedef UvmGpuExternalMappingInfo gpuExternalMappingInfo;
|
||||
typedef UvmGpuExternalPhysAddrInfo gpuExternalPhysAddrInfo;
|
||||
typedef UvmGpuChannelResourceInfo gpuChannelResourceInfo;
|
||||
typedef UvmGpuChannelInstanceInfo gpuChannelInstanceInfo;
|
||||
typedef UvmGpuChannelResourceBindParams gpuChannelResourceBindParams;
|
||||
typedef UvmGpuFbInfo gpuFbInfo;
|
||||
typedef UvmGpuEccInfo gpuEccInfo;
|
||||
typedef UvmGpuNvlinkInfo gpuNvlinkInfo;
|
||||
typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
|
||||
typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
|
||||
@@ -50,6 +50,8 @@
|
||||
#define NVKMS_LOG2_LUT_ARRAY_SIZE 10
|
||||
#define NVKMS_LUT_ARRAY_SIZE (1 << NVKMS_LOG2_LUT_ARRAY_SIZE)
|
||||
|
||||
#define NVKMS_OLUT_FP_NORM_SCALE_DEFAULT 0xffffffff
|
||||
|
||||
typedef NvU32 NvKmsDeviceHandle;
|
||||
typedef NvU32 NvKmsDispHandle;
|
||||
typedef NvU32 NvKmsConnectorHandle;
|
||||
@@ -245,6 +247,80 @@ struct NvKmsLutRamps {
|
||||
NvU16 blue[NVKMS_LUT_ARRAY_SIZE]; /*! in */
|
||||
};
|
||||
|
||||
/* Datatypes for LUT capabilities */
|
||||
enum NvKmsLUTFormat {
|
||||
/*
|
||||
* Normalized fixed-point format mapping [0, 1] to [0x0, 0xFFFF].
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_UNORM16,
|
||||
|
||||
/*
|
||||
* Half-precision floating point.
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_FP16,
|
||||
|
||||
/*
|
||||
* 14-bit fixed-point format required to work around hardware bug 813188.
|
||||
*
|
||||
* To convert from UNORM16 to UNORM14_WAR_813188:
|
||||
* unorm14_war_813188 = ((unorm16 >> 2) & ~7) + 0x6000
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_UNORM14_WAR_813188
|
||||
};
|
||||
|
||||
enum NvKmsLUTVssSupport {
|
||||
NVKMS_LUT_VSS_NOT_SUPPORTED,
|
||||
NVKMS_LUT_VSS_SUPPORTED,
|
||||
NVKMS_LUT_VSS_REQUIRED,
|
||||
};
|
||||
|
||||
enum NvKmsLUTVssType {
|
||||
NVKMS_LUT_VSS_TYPE_NONE,
|
||||
NVKMS_LUT_VSS_TYPE_LINEAR,
|
||||
NVKMS_LUT_VSS_TYPE_LOGARITHMIC,
|
||||
};
|
||||
|
||||
struct NvKmsLUTCaps {
|
||||
/*! Whether this layer or head on this device supports this LUT stage. */
|
||||
NvBool supported;
|
||||
|
||||
/*! Whether this LUT supports VSS. */
|
||||
enum NvKmsLUTVssSupport vssSupport;
|
||||
|
||||
/*!
|
||||
* The type of VSS segmenting this LUT uses.
|
||||
*/
|
||||
enum NvKmsLUTVssType vssType;
|
||||
|
||||
/*!
|
||||
* Expected number of VSS segments.
|
||||
*/
|
||||
NvU32 vssSegments;
|
||||
|
||||
/*!
|
||||
* Expected number of LUT entries.
|
||||
*/
|
||||
NvU32 lutEntries;
|
||||
|
||||
/*!
|
||||
* Format for each of the LUT entries.
|
||||
*/
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
};
|
||||
|
||||
/* each LUT entry uses this many bytes */
|
||||
#define NVKMS_LUT_CAPS_LUT_ENTRY_SIZE (4 * sizeof(NvU16))
|
||||
|
||||
/* if the LUT surface uses VSS, size of the VSS header */
|
||||
#define NVKMS_LUT_VSS_HEADER_SIZE (4 * NVKMS_LUT_CAPS_LUT_ENTRY_SIZE)
|
||||
|
||||
struct NvKmsLUTSurfaceParams {
|
||||
NvKmsSurfaceHandle surfaceHandle;
|
||||
NvU64 offset NV_ALIGN_BYTES(8);
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
};
|
||||
|
||||
/*
|
||||
* A 3x4 row-major colorspace conversion matrix.
|
||||
*
|
||||
@@ -463,6 +539,10 @@ struct NvKmsLayerCapabilities {
|
||||
* still expected to honor the NvKmsUsageBounds for each head.
|
||||
*/
|
||||
NvU64 supportedSurfaceMemoryFormats NV_ALIGN_BYTES(8);
|
||||
|
||||
/* Capabilities for each LUT stage in the EVO3 precomp pipeline. */
|
||||
struct NvKmsLUTCaps ilut;
|
||||
struct NvKmsLUTCaps tmo;
|
||||
};
|
||||
|
||||
/*!
|
||||
@@ -683,4 +763,20 @@ struct NvKmsSuperframeInfo {
|
||||
} view[NVKMS_MAX_SUPERFRAME_VIEWS];
|
||||
};
|
||||
|
||||
/* Fields within NvKmsVblankSemControlDataOneHead::flags */
|
||||
#define NVKMS_VBLANK_SEM_CONTROL_SWAP_INTERVAL 15:0
|
||||
|
||||
struct NvKmsVblankSemControlDataOneHead {
|
||||
NvU32 requestCounterAccel;
|
||||
NvU32 requestCounter;
|
||||
NvU32 flags;
|
||||
|
||||
NvU32 semaphore;
|
||||
NvU64 vblankCount NV_ALIGN_BYTES(8);
|
||||
};
|
||||
|
||||
struct NvKmsVblankSemControlData {
|
||||
struct NvKmsVblankSemControlDataOneHead head[NV_MAX_HEADS];
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
||||
@@ -124,6 +124,14 @@ struct NvKmsKapiDisplayMode {
|
||||
#define NVKMS_KAPI_LAYER_INVALID_IDX 0xff
|
||||
#define NVKMS_KAPI_LAYER_PRIMARY_IDX 0
|
||||
|
||||
struct NvKmsKapiLutCaps {
|
||||
struct {
|
||||
struct NvKmsLUTCaps ilut;
|
||||
struct NvKmsLUTCaps tmo;
|
||||
} layer[NVKMS_KAPI_LAYER_MAX];
|
||||
struct NvKmsLUTCaps olut;
|
||||
};
|
||||
|
||||
struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvU32 numHeads;
|
||||
@@ -169,6 +177,8 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
|
||||
|
||||
struct NvKmsKapiLutCaps lutCaps;
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@@ -262,21 +272,54 @@ struct NvKmsKapiLayerConfig {
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} ilut;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} tmo;
|
||||
|
||||
struct NvKmsCscMatrix csc;
|
||||
NvBool cscUseMain;
|
||||
|
||||
struct {
|
||||
struct NvKmsCscMatrix lmsCtm;
|
||||
struct NvKmsCscMatrix lmsToItpCtm;
|
||||
struct NvKmsCscMatrix itpToLmsCtm;
|
||||
struct NvKmsCscMatrix blendCtm;
|
||||
struct {
|
||||
NvBool lmsCtm : 1;
|
||||
NvBool lmsToItpCtm : 1;
|
||||
NvBool itpToLmsCtm : 1;
|
||||
NvBool blendCtm : 1;
|
||||
} enabled;
|
||||
} matrixOverrides;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
struct NvKmsKapiLayerConfig config;
|
||||
struct {
|
||||
NvBool surfaceChanged : 1;
|
||||
NvBool srcXYChanged : 1;
|
||||
NvBool srcWHChanged : 1;
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool surfaceChanged : 1;
|
||||
NvBool srcXYChanged : 1;
|
||||
NvBool srcWHChanged : 1;
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool matrixOverridesChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
NvBool tmoChanged : 1;
|
||||
} flags;
|
||||
};
|
||||
|
||||
@@ -342,18 +385,30 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
struct NvKmsLutRamps *pRamps;
|
||||
} output;
|
||||
} lut;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} olut;
|
||||
|
||||
NvU32 olutFpNormScale;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
struct NvKmsKapiHeadModeSetConfig modeSetConfig;
|
||||
struct {
|
||||
NvBool activeChanged : 1;
|
||||
NvBool displaysChanged : 1;
|
||||
NvBool modeChanged : 1;
|
||||
NvBool hdrInfoFrameChanged : 1;
|
||||
NvBool colorimetryChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
NvBool olutChanged : 1;
|
||||
NvBool activeChanged : 1;
|
||||
NvBool displaysChanged : 1;
|
||||
NvBool modeChanged : 1;
|
||||
NvBool hdrInfoFrameChanged : 1;
|
||||
NvBool colorimetryChanged : 1;
|
||||
NvBool legacyIlutChanged : 1;
|
||||
NvBool legacyOlutChanged : 1;
|
||||
NvBool olutChanged : 1;
|
||||
NvBool olutFpNormScaleChanged : 1;
|
||||
} flags;
|
||||
|
||||
struct NvKmsKapiCursorRequestedConfig cursorRequestedConfig;
|
||||
@@ -489,6 +544,9 @@ struct NvKmsKapiCreateSurfaceParams {
|
||||
* explicit_layout is NV_TRUE and layout is
|
||||
* NvKmsSurfaceMemoryLayoutBlockLinear */
|
||||
NvU8 log2GobsPerBlockY;
|
||||
|
||||
/* [IN] Whether a surface can be updated directly on the screen */
|
||||
NvBool noDisplayCaching;
|
||||
};
|
||||
|
||||
enum NvKmsKapiAllocationType {
|
||||
@@ -956,6 +1014,17 @@ struct NvKmsKapiFunctionsTable {
|
||||
const void *pLinearAddress
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if memory object allocated is video memory.
|
||||
*
|
||||
* \param [in] memory Memory allocated using allocateMemory()
|
||||
*
|
||||
* \return NV_TRUE if memory is vidmem, NV_FALSE otherwise.
|
||||
*/
|
||||
NvBool (*isVidmem)(
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*!
|
||||
* Create a formatted surface from an NvKmsKapiMemory object.
|
||||
*
|
||||
@@ -1172,21 +1241,6 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU64 *pPages
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if this memory object can be scanned out for display.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] memory The memory object to check for display support.
|
||||
*
|
||||
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
|
||||
*/
|
||||
NvBool (*isMemoryValidForDisplay)
|
||||
(
|
||||
const struct NvKmsKapiDevice *device,
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*
|
||||
* Import SGT as a memory handle.
|
||||
*
|
||||
@@ -1425,7 +1479,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
);
|
||||
|
||||
/*!
|
||||
* Immediately reset the specified display semaphore to the pending state.
|
||||
* Immediately initialize the specified display semaphore to the pending state.
|
||||
*
|
||||
* Must be called prior to applying a mode set that utilizes the specified
|
||||
* display semaphore for synchronization.
|
||||
@@ -1438,7 +1492,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
* for the specified device.
|
||||
*/
|
||||
NvBool
|
||||
(*resetDisplaySemaphore)
|
||||
(*tryInitDisplaySemaphore)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 semaphoreIndex
|
||||
@@ -1447,7 +1501,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
/*!
|
||||
* Immediately set the specified display semaphore to the displayable state.
|
||||
*
|
||||
* Must be called after \ref resetDisplaySemaphore to indicate a mode
|
||||
* Must be called after \ref tryInitDisplaySemaphore to indicate a mode
|
||||
* configuration change that utilizes the specified display semaphore for
|
||||
* synchronization may proceed.
|
||||
*
|
||||
@@ -1471,7 +1525,7 @@ struct NvKmsKapiFunctionsTable {
|
||||
*
|
||||
* This can be used by clients to restore a semaphore to a consistent state
|
||||
* when they have prepared it for use by previously calling
|
||||
* \ref resetDisplaySemaphore() on it, but are then prevented from
|
||||
* \ref tryInitDisplaySemaphore() on it, but are then prevented from
|
||||
* submitting the associated hardware operations to consume it due to the
|
||||
* subsequent failure of some software or hardware operation.
|
||||
*
|
||||
@@ -1504,6 +1558,16 @@ struct NvKmsKapiFunctionsTable {
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvS32 index
|
||||
);
|
||||
|
||||
/*
|
||||
* Notify NVKMS that the system's framebuffer console has been disabled and
|
||||
* the reserved allocation for the old framebuffer console can be unmapped.
|
||||
*/
|
||||
void
|
||||
(*framebufferConsoleDisabled)
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
);
|
||||
};
|
||||
|
||||
/** @} */
|
||||
@@ -1518,6 +1582,20 @@ NvBool nvKmsKapiGetFunctionsTable
|
||||
struct NvKmsKapiFunctionsTable *funcsTable
|
||||
);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32(NvU16 a);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16(NvU32 a);
|
||||
|
||||
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32(NvU32 a);
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif /* defined(__NVKMS_KAPI_H__) */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -34,19 +34,25 @@
|
||||
/*
|
||||
* This is the maximum number of GPUs supported in a single system.
|
||||
*/
|
||||
#define NV_MAX_DEVICES 32
|
||||
#define NV_MAX_DEVICES 32
|
||||
|
||||
/*
|
||||
* This is the maximum number of subdevices within a single device.
|
||||
*/
|
||||
#define NV_MAX_SUBDEVICES 8
|
||||
#define NV_MAX_SUBDEVICES 8
|
||||
|
||||
/*
|
||||
* This is the maximum length of the process name string.
|
||||
*/
|
||||
#define NV_PROC_NAME_MAX_LENGTH 100U
|
||||
#define NV_PROC_NAME_MAX_LENGTH 100U
|
||||
|
||||
/*
|
||||
* This is the maximum number of heads per GPU.
|
||||
*/
|
||||
#define NV_MAX_HEADS 4
|
||||
#define NV_MAX_HEADS 4
|
||||
|
||||
/*
|
||||
* Maximum length of a MIG device UUID. It is a 36-byte UUID string plus a
|
||||
* 4-byte prefix and NUL terminator: 'M' 'I' 'G' '-' UUID '\0x0'
|
||||
*/
|
||||
#define NV_MIG_DEVICE_UUID_STR_LENGTH 41U
|
||||
|
||||
@@ -33,38 +33,18 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
|
||||
//
|
||||
// Miscellaneous macros useful for bit field manipulations
|
||||
//
|
||||
// STUPID HACK FOR CL 19434692. Will revert when fix CL is delivered bfm -> chips_a.
|
||||
#ifndef BIT
|
||||
#define BIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT32
|
||||
#define BIT32(b) ((NvU32)1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT64
|
||||
#define BIT64(b) ((NvU64)1U<<(b))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// It is recommended to use the following bit macros to avoid macro name
|
||||
// collisions with other src code bases.
|
||||
//
|
||||
// Miscellaneous macros useful for bit field manipulations.
|
||||
#ifndef NVBIT
|
||||
#define NVBIT(b) (1U<<(b))
|
||||
#define NVBIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef NVBIT_TYPE
|
||||
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
|
||||
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
|
||||
#endif
|
||||
#ifndef NVBIT32
|
||||
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
|
||||
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
|
||||
#endif
|
||||
#ifndef NVBIT64
|
||||
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
|
||||
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
|
||||
#endif
|
||||
|
||||
//Concatenate 2 32bit values to a 64bit value
|
||||
@@ -72,7 +52,7 @@ extern "C" {
|
||||
|
||||
// Helper macro's for 32 bit bitmasks
|
||||
#define NV_BITMASK32_ELEMENT_SIZE (sizeof(NvU32) << 3)
|
||||
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
|
||||
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
|
||||
#define NV_BITMASK32_OFFSET(chId) ((chId) & (0x1F))
|
||||
#define NV_BITMASK32_SET(pChannelMask, chId) \
|
||||
(pChannelMask)[NV_BITMASK32_IDX(chId)] |= NVBIT(NV_BITMASK32_OFFSET(chId))
|
||||
@@ -721,6 +701,42 @@ nvPrevPow2_U64(const NvU64 x )
|
||||
} \
|
||||
}
|
||||
|
||||
//
|
||||
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
|
||||
// ucode compilers to avoid signature mismatch.
|
||||
//
|
||||
#ifndef NVDEC_1_0
|
||||
/*!
|
||||
* Returns the position of nth set bit in the given mask.
|
||||
*
|
||||
* Returns -1 if mask has fewer than n bits set.
|
||||
*
|
||||
* n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is
|
||||
* the first set LSB.
|
||||
*
|
||||
* Example, if mask = 0x000000F0u and n = 1, the return value will be 5.
|
||||
* Example, if mask = 0x000000F0u and n = 4, the return value will be -1.
|
||||
*/
|
||||
static NV_FORCEINLINE NvS32
|
||||
nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
{
|
||||
NvU32 seenSetBitsCount = 0;
|
||||
NvS32 index;
|
||||
FOR_EACH_INDEX_IN_MASK(32, index, mask)
|
||||
{
|
||||
if (seenSetBitsCount == n)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
++seenSetBitsCount;
|
||||
}
|
||||
FOR_EACH_INDEX_IN_MASK_END;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // NVDEC_1_0
|
||||
|
||||
//
|
||||
// Size to use when declaring variable-sized arrays
|
||||
//
|
||||
@@ -954,6 +970,22 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
||||
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
|
||||
//
|
||||
// Deprecated macros whose definition can be removed once the code base no longer references them.
|
||||
// Use the NVBIT* macros instead of these macros.
|
||||
//
|
||||
#ifndef BIT
|
||||
#define BIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT32
|
||||
#define BIT32(b) ((NvU32)1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT64
|
||||
#define BIT64(b) ((NvU64)1U<<(b))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
@@ -155,6 +155,11 @@ NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation no
|
||||
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
|
||||
NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED, 0x0000007F, "GFW secure boot failed")
|
||||
NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY, 0x00000080, "No more ZBC entry for the client")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
||||
@@ -40,8 +40,11 @@
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-kernel-interface-api.h>
|
||||
#include <os/nv_memory_type.h>
|
||||
#include <os/nv_memory_area.h>
|
||||
#include <nv-caps.h>
|
||||
|
||||
#include "rs_access.h"
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
@@ -102,8 +105,10 @@ NvBool NV_API_CALL os_pci_remove_supported (void);
|
||||
void NV_API_CALL os_pci_remove (void *);
|
||||
void* NV_API_CALL os_map_kernel_space (NvU64, NvU64, NvU32);
|
||||
void NV_API_CALL os_unmap_kernel_space (void *, NvU64);
|
||||
void* NV_API_CALL os_map_user_space (NvU64, NvU64, NvU32, NvU32, void **);
|
||||
#if defined(NV_VMWARE)
|
||||
void* NV_API_CALL os_map_user_space (MemoryArea *, NvU32, NvU32, void **);
|
||||
void NV_API_CALL os_unmap_user_space (void *, NvU64, void *);
|
||||
#endif
|
||||
NV_STATUS NV_API_CALL os_flush_cpu_cache_all (void);
|
||||
NV_STATUS NV_API_CALL os_flush_user_cache (void);
|
||||
void NV_API_CALL os_flush_cpu_write_combine_buffer(void);
|
||||
@@ -114,7 +119,7 @@ void NV_API_CALL os_io_write_byte (NvU32, NvU8);
|
||||
void NV_API_CALL os_io_write_word (NvU32, NvU16);
|
||||
void NV_API_CALL os_io_write_dword (NvU32, NvU32);
|
||||
NvBool NV_API_CALL os_is_administrator (void);
|
||||
NvBool NV_API_CALL os_allow_priority_override (void);
|
||||
NvBool NV_API_CALL os_check_access (RsAccessRight accessRight);
|
||||
void NV_API_CALL os_dbg_init (void);
|
||||
void NV_API_CALL os_dbg_breakpoint (void);
|
||||
void NV_API_CALL os_dbg_set_level (NvU32);
|
||||
@@ -130,7 +135,8 @@ void NV_API_CALL os_free_spinlock (void *);
|
||||
NvU64 NV_API_CALL os_acquire_spinlock (void *);
|
||||
void NV_API_CALL os_release_spinlock (void *, NvU64);
|
||||
NV_STATUS NV_API_CALL os_queue_work_item (struct os_work_queue *, void *);
|
||||
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *);
|
||||
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *, NvBool);
|
||||
NvBool NV_API_CALL os_is_queue_flush_ongoing (struct os_work_queue *);
|
||||
NV_STATUS NV_API_CALL os_alloc_mutex (void **);
|
||||
void NV_API_CALL os_free_mutex (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_mutex (void *);
|
||||
@@ -164,7 +170,7 @@ NvU32 NV_API_CALL os_get_grid_csp_support (void);
|
||||
void NV_API_CALL os_bug_check (NvU32, const char *);
|
||||
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *, NvU32);
|
||||
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_get_smbios_header (NvU64 *pSmbsAddr);
|
||||
@@ -172,6 +178,7 @@ NV_STATUS NV_API_CALL os_get_acpi_rsdp_from_uefi (NvU32 *);
|
||||
void NV_API_CALL os_add_record_for_crashLog (void *, NvU32);
|
||||
void NV_API_CALL os_delete_record_for_crashLog (void *);
|
||||
NV_STATUS NV_API_CALL os_call_vgpu_vfio (void *, NvU32);
|
||||
NV_STATUS NV_API_CALL os_device_vm_present (void);
|
||||
NV_STATUS NV_API_CALL os_numa_memblock_size (NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_alloc_pages_node (NvS32, NvU32, NvU32, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_page (NvU64 address);
|
||||
@@ -207,6 +214,7 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
void NV_API_CALL os_pci_trigger_flr(void *handle);
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
@@ -214,11 +222,14 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
void* NV_API_CALL os_get_pid_info(void);
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info);
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
|
||||
NvBool NV_API_CALL os_is_init_ns(void);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_sev_snp_enabled;
|
||||
extern NvBool os_cc_snp_vtom_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
||||
@@ -36,4 +36,69 @@ typedef struct MemoryArea
|
||||
NvU64 numRanges;
|
||||
} MemoryArea;
|
||||
|
||||
static inline NvU64 memareaSize(MemoryArea memArea)
|
||||
{
|
||||
NvU64 size = 0;
|
||||
NvU64 idx = 0;
|
||||
for (idx = 0; idx < memArea.numRanges; idx++)
|
||||
{
|
||||
size += memArea.pRanges[idx].size;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline MemoryRange
|
||||
mrangeMake
|
||||
(
|
||||
NvU64 start,
|
||||
NvU64 size
|
||||
)
|
||||
{
|
||||
MemoryRange range;
|
||||
range.start = start;
|
||||
range.size = size;
|
||||
return range;
|
||||
}
|
||||
|
||||
static inline NvU64
|
||||
mrangeLimit
|
||||
(
|
||||
MemoryRange a
|
||||
)
|
||||
{
|
||||
return a.start + a.size;
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
mrangeIntersects
|
||||
(
|
||||
MemoryRange a,
|
||||
MemoryRange b
|
||||
)
|
||||
{
|
||||
return ((a.start >= b.start) && (a.start < mrangeLimit(b))) ||
|
||||
((b.start >= a.start) && (b.start < mrangeLimit(a)));
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
mrangeContains
|
||||
(
|
||||
MemoryRange outer,
|
||||
MemoryRange inner
|
||||
)
|
||||
{
|
||||
return (inner.start >= outer.start) && (mrangeLimit(inner) <= mrangeLimit(outer));
|
||||
}
|
||||
|
||||
static inline MemoryRange
|
||||
mrangeOffset
|
||||
(
|
||||
MemoryRange range,
|
||||
NvU64 amt
|
||||
)
|
||||
{
|
||||
range.start += amt;
|
||||
return range;
|
||||
}
|
||||
|
||||
#endif /* NV_MEMORY_AREA_H */
|
||||
|
||||
@@ -81,13 +81,15 @@ NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
|
||||
void NV_API_CALL rm_gpu_ops_p2p_object_destroy(nvidia_stack_t *, nvgpuSessionHandle_t, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_ptes(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalMappingInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_phys_addrs(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalPhysAddrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_retain_channel(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvHandle, NvHandle, void **, nvgpuChannelInstanceInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_bind_channel_resources(nvidia_stack_t *, void *, nvgpuChannelResourceBindParams_t);
|
||||
void NV_API_CALL rm_gpu_ops_release_channel(nvidia_stack_t *, void *);
|
||||
@@ -100,6 +102,7 @@ void NV_API_CALL rm_gpu_ops_paging_channel_destroy(nvidia_stack_t *, nvgpu
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t, NvU64 *);
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
void NV_API_CALL rm_gpu_ops_report_fatal_error(nvidia_stack_t *, NV_STATUS error);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
|
||||
276
kernel-open/common/inc/rs_access.h
Normal file
276
kernel-open/common/inc/rs_access.h
Normal file
@@ -0,0 +1,276 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <nvtypes.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: rs_access.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right definitions */
|
||||
/****************************************************************************/
|
||||
|
||||
//
|
||||
// The meaning of each access right is documented in
|
||||
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
//
|
||||
// RS_ACCESS_COUNT is the number of access rights that have been defined
|
||||
// and are in use. All integers in the range [0, RS_ACCESS_COUNT) should
|
||||
// represent valid access rights.
|
||||
//
|
||||
// When adding a new access right, don't forget to update
|
||||
// 1) The descriptions in the resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
// 2) RS_ACCESS_COUNT, defined below
|
||||
// 3) The declaration of g_rsAccessMetadata in rs_access_rights.c
|
||||
// 4) The list of access rights in drivers/common/chip-config/Chipcontrols.pm
|
||||
// 5) Any relevant access right callbacks
|
||||
//
|
||||
|
||||
#define RS_ACCESS_DUP_OBJECT 0U
|
||||
#define RS_ACCESS_NICE 1U
|
||||
#define RS_ACCESS_DEBUG 2U
|
||||
#define RS_ACCESS_PERFMON 3U
|
||||
#define RS_ACCESS_COUNT 4U
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right data structures */
|
||||
/****************************************************************************/
|
||||
|
||||
/*!
|
||||
* @brief A type that can be used to represent any access right.
|
||||
*/
|
||||
typedef NvU16 RsAccessRight;
|
||||
|
||||
/*!
|
||||
* @brief An internal type used to represent one limb in an access right mask.
|
||||
*/
|
||||
typedef NvU32 RsAccessLimb;
|
||||
#define SDK_RS_ACCESS_LIMB_BITS 32
|
||||
|
||||
/*!
|
||||
* @brief The number of limbs in the RS_ACCESS_MASK struct.
|
||||
*/
|
||||
#define SDK_RS_ACCESS_MAX_LIMBS 1
|
||||
|
||||
/*!
|
||||
* @brief The maximum number of possible access rights supported by the
|
||||
* current data structure definition.
|
||||
*
|
||||
* You probably want RS_ACCESS_COUNT instead, which is the number of actual
|
||||
* access rights defined.
|
||||
*/
|
||||
#define SDK_RS_ACCESS_MAX_COUNT (0x20) /* finn: Evaluated from "(SDK_RS_ACCESS_LIMB_BITS * SDK_RS_ACCESS_MAX_LIMBS)" */
|
||||
|
||||
/**
|
||||
* @brief A struct representing a set of access rights.
|
||||
*
|
||||
* Note that the values of bit positions larger than RS_ACCESS_COUNT is
|
||||
* undefined, and should not be assumed to be 0 (see RS_ACCESS_MASK_FILL).
|
||||
*/
|
||||
typedef struct RS_ACCESS_MASK {
|
||||
RsAccessLimb limbs[SDK_RS_ACCESS_MAX_LIMBS];
|
||||
} RS_ACCESS_MASK;
|
||||
|
||||
/**
|
||||
* @brief A struct representing auxiliary information about each access right.
|
||||
*/
|
||||
typedef struct RS_ACCESS_INFO {
|
||||
NvU32 flags;
|
||||
} RS_ACCESS_INFO;
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right macros */
|
||||
/****************************************************************************/
|
||||
|
||||
#define SDK_RS_ACCESS_LIMB_INDEX(index) ((index) / SDK_RS_ACCESS_LIMB_BITS)
|
||||
#define SDK_RS_ACCESS_LIMB_POS(index) ((index) % SDK_RS_ACCESS_LIMB_BITS)
|
||||
|
||||
#define SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) \
|
||||
((pAccessMask)->limbs[SDK_RS_ACCESS_LIMB_INDEX(index)])
|
||||
#define SDK_RS_ACCESS_OFFSET_MASK(index) \
|
||||
NVBIT_TYPE(SDK_RS_ACCESS_LIMB_POS(index), RsAccessLimb)
|
||||
|
||||
/*!
|
||||
* @brief Checks that accessRight represents a valid access right.
|
||||
*
|
||||
* The valid range of access rights is [0, RS_ACCESS_COUNT).
|
||||
*
|
||||
* @param[in] accessRight The access right value to check
|
||||
*
|
||||
* @return true if accessRight is valid
|
||||
* @return false otherwise
|
||||
*/
|
||||
#define RS_ACCESS_BOUNDS_CHECK(accessRight) \
|
||||
(accessRight < RS_ACCESS_COUNT)
|
||||
|
||||
/*!
|
||||
* @brief Test whether an access right is present in a set
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to read
|
||||
* @param[in] index The access right to examine
|
||||
*
|
||||
* @return NV_TRUE if the access right specified by index was present in the set,
|
||||
* and NV_FALSE otherwise
|
||||
*/
|
||||
#define RS_ACCESS_MASK_TEST(pAccessMask, index) \
|
||||
(RS_ACCESS_BOUNDS_CHECK(index) && \
|
||||
(SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) & SDK_RS_ACCESS_OFFSET_MASK(index)) != 0)
|
||||
|
||||
/*!
|
||||
* @brief Add an access right to a mask
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to modify
|
||||
* @param[in] index The access right to set
|
||||
*/
|
||||
#define RS_ACCESS_MASK_ADD(pAccessMask, index) \
|
||||
do \
|
||||
{ \
|
||||
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) |= SDK_RS_ACCESS_OFFSET_MASK(index); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Remove an access right from a mask
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to modify
|
||||
* @param[in] index The access right to unset
|
||||
*/
|
||||
#define RS_ACCESS_MASK_REMOVE(pAccessMask, index) \
|
||||
do \
|
||||
{ \
|
||||
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) &= ~SDK_RS_ACCESS_OFFSET_MASK(index); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Performs an in-place union between two access right masks
|
||||
*
|
||||
* @param[in,out] pMaskOut The access rights mask to be updated
|
||||
* @param[in] pMaskIn The set of access rights to be added to pMaskOut
|
||||
*/
|
||||
#define RS_ACCESS_MASK_UNION(pMaskOut, pMaskIn) \
|
||||
do \
|
||||
{ \
|
||||
NvLength limb; \
|
||||
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
|
||||
{ \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) |= SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Performs an in-place subtract of one mask's rights from another
|
||||
*
|
||||
* @param[in,out] pMaskOut The access rights mask to be updated
|
||||
* @param[in] pMaskIn The set of access rights to be removed from pMaskOut
|
||||
*/
|
||||
#define RS_ACCESS_MASK_SUBTRACT(pMaskOut, pMaskIn) \
|
||||
do \
|
||||
{ \
|
||||
NvLength limb; \
|
||||
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
|
||||
{ \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) &= ~SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Removes all rights from an access rights mask
|
||||
*
|
||||
* @param[in,out] pAccessMask The access rights mask to be updated
|
||||
*/
|
||||
#define RS_ACCESS_MASK_CLEAR(pAccessMask) \
|
||||
do \
|
||||
{ \
|
||||
portMemSet(pAccessMask, 0, sizeof(*pAccessMask)); \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Adds all rights to an access rights mask
|
||||
*
|
||||
* @param[in,out] pAccessMask The access rights mask to be updated
|
||||
*/
|
||||
#define RS_ACCESS_MASK_FILL(pAccessMask) \
|
||||
do \
|
||||
{ \
|
||||
portMemSet(pAccessMask, 0xff, sizeof(*pAccessMask)); \
|
||||
} while (NV_FALSE)
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Share definitions */
|
||||
/****************************************************************************/
|
||||
|
||||
//
|
||||
// The usage of Share Policy and the meaning of each share type is documented in
|
||||
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
//
|
||||
#define RS_SHARE_TYPE_NONE (0U)
|
||||
#define RS_SHARE_TYPE_ALL (1U)
|
||||
#define RS_SHARE_TYPE_OS_SECURITY_TOKEN (2U)
|
||||
#define RS_SHARE_TYPE_CLIENT (3U)
|
||||
#define RS_SHARE_TYPE_PID (4U)
|
||||
#define RS_SHARE_TYPE_SMC_PARTITION (5U)
|
||||
#define RS_SHARE_TYPE_GPU (6U)
|
||||
#define RS_SHARE_TYPE_FM_CLIENT (7U)
|
||||
// Must be last. Update when a new SHARE_TYPE is added
|
||||
#define RS_SHARE_TYPE_MAX (8U)
|
||||
|
||||
|
||||
//
|
||||
// Use Revoke to remove an existing policy from the list.
|
||||
// Allow is based on OR logic, Require is based on AND logic.
|
||||
// To share a right, at least one Allow (non-Require) must match, and all Require must pass.
|
||||
// If Compose is specified, policies will be added to the list. Otherwise, they will replace the list.
|
||||
//
|
||||
#define RS_SHARE_ACTION_FLAG_REVOKE NVBIT(0)
|
||||
#define RS_SHARE_ACTION_FLAG_REQUIRE NVBIT(1)
|
||||
#define RS_SHARE_ACTION_FLAG_COMPOSE NVBIT(2)
|
||||
|
||||
/****************************************************************************/
|
||||
/* Share flag data structures */
|
||||
/****************************************************************************/
|
||||
|
||||
typedef struct RS_SHARE_POLICY {
|
||||
NvU32 target;
|
||||
RS_ACCESS_MASK accessMask;
|
||||
NvU16 type; ///< RS_SHARE_TYPE_
|
||||
NvU8 action; ///< RS_SHARE_ACTION_
|
||||
} RS_SHARE_POLICY;
|
||||
@@ -25,6 +25,7 @@ fi
|
||||
# VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
|
||||
# GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
|
||||
# GRID_BUILD_CSP parameter defined only for GRID CSP builds (GRID Guest driver for CSPs)
|
||||
# VGX_DEVICE_VM_BUILD parameter defined only for Device VM VGX build (vGPU Host driver)
|
||||
|
||||
test_xen() {
|
||||
#
|
||||
@@ -661,23 +662,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pci_bus_address)
|
||||
#
|
||||
# Determine if the pci_bus_address() function is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 06cf56e497c8 ("PCI: Add pci_bus_address() to
|
||||
# get bus address of a BAR") in v3.14
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_pci_bus_address(void) {
|
||||
pci_bus_address();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_BUS_ADDRESS_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
hash__remap_4k_pfn)
|
||||
#
|
||||
# Determine if the hash__remap_4k_pfn() function is
|
||||
@@ -823,6 +807,16 @@ compile_test() {
|
||||
return
|
||||
;;
|
||||
|
||||
device_vm_build)
|
||||
# Add config parameter if running on Device VM.
|
||||
if [ -n "$VGX_DEVICE_VM_BUILD" ]; then
|
||||
echo "#define NV_DEVICE_VM_BUILD" | append_conftest "generic"
|
||||
else
|
||||
echo "#undef NV_DEVICE_VM_BUILD" | append_conftest "generic"
|
||||
fi
|
||||
return
|
||||
;;
|
||||
|
||||
vfio_register_notifier)
|
||||
#
|
||||
# Check number of arguments required.
|
||||
@@ -1290,6 +1284,77 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
egm_module_helper_api_present)
|
||||
#
|
||||
# Determine if egm management api are present or not.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/nvgrace-egm.h>
|
||||
void conftest_egm_module_helper_api_present() {
|
||||
struct pci_dev *pdev;
|
||||
register_egm_node(pdev);
|
||||
unregister_egm_node(0);
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_EGM_MODULE_HELPER_API_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
egm_bad_pages_handling_support)
|
||||
#
|
||||
# Determine if egm_bad_pages_list is present or not.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <linux/egm.h>
|
||||
void conftest_egm_bad_pages_handle() {
|
||||
int ioctl = EGM_BAD_PAGES_LIST;
|
||||
struct egm_bad_pages_list list;
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_EGM_BAD_PAGES_HANDLING_SUPPORT" "" "types"
|
||||
;;
|
||||
|
||||
class_create_has_no_owner_arg)
|
||||
#
|
||||
# Determine if the class_create API with the new signature
|
||||
# is present or not.
|
||||
#
|
||||
# Added by commit 1aaba11da9aa ("driver core: class: remove
|
||||
# module * from class_create()") in v6.4 (2023-03-13)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device/class.h>
|
||||
void conftest_class_create() {
|
||||
struct class *class;
|
||||
class = class_create(\"test\");
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CLASS_CREATE_HAS_NO_OWNER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
class_devnode_has_const_arg)
|
||||
#
|
||||
# Determine if the class.devnode is present with the new signature.
|
||||
#
|
||||
# Added by commit ff62b8e6588f ("driver core: make struct
|
||||
# class.devnode() take a const *") in v6.2 (2022-11-23)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
static char *conftest_devnode(const struct device *device, umode_t *mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void conftest_class_devnode() {
|
||||
struct class class;
|
||||
class.devnode = conftest_devnode;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CLASS_DEVNODE_HAS_CONST_ARG" "" "types"
|
||||
;;
|
||||
|
||||
pci_irq_vector_helpers)
|
||||
#
|
||||
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
|
||||
@@ -1538,23 +1603,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GET_NUM_PHYSPAGES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
backing_dev_info)
|
||||
#
|
||||
# Determine if the 'address_space' structure has
|
||||
# a 'backing_dev_info' field.
|
||||
#
|
||||
# Removed by commit b83ae6d42143 ("fs: remove
|
||||
# mapping->backing_dev_info") in v4.0
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_backing_dev_info(void) {
|
||||
return offsetof(struct address_space, backing_dev_info);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO" "" "types"
|
||||
;;
|
||||
|
||||
xen_ioemu_inject_msi)
|
||||
# Determine if the xen_ioemu_inject_msi() function is present.
|
||||
CODE="
|
||||
@@ -1804,22 +1852,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
pnv_pci_get_npu_dev)
|
||||
#
|
||||
# Determine if the pnv_pci_get_npu_dev function is present.
|
||||
#
|
||||
# Added by commit 5d2aa710e697 ("powerpc/powernv: Add support
|
||||
# for Nvlink NPUs") in v4.5
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_pnv_pci_get_npu_dev() {
|
||||
pnv_pci_get_npu_dev();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
kernel_write_has_pointer_pos_arg)
|
||||
#
|
||||
# Determine the pos argument type, which was changed by commit
|
||||
@@ -2409,45 +2441,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
wait_on_bit_lock_argument_count)
|
||||
#
|
||||
# Determine how many arguments wait_on_bit_lock takes.
|
||||
#
|
||||
# Changed by commit 743162013d40 ("sched: Remove proliferation
|
||||
# of wait_on_bit() action functions") in v3.17 (2014-07-07)
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/wait.h>
|
||||
void conftest_wait_on_bit_lock(void) {
|
||||
wait_on_bit_lock(NULL, 0, 0);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/wait.h>
|
||||
void conftest_wait_on_bit_lock(void) {
|
||||
wait_on_bit_lock(NULL, 0, NULL, 0);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 4" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
echo "#error wait_on_bit_lock() conftest failed!" | append_conftest "functions"
|
||||
;;
|
||||
|
||||
pci_stop_and_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_stop_and_remove_bus_device() function is present.
|
||||
@@ -2523,29 +2516,20 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
mm_context_t)
|
||||
file_operations_fop_unsigned_offset_present)
|
||||
#
|
||||
# Determine if the 'mm_context_t' data type is present
|
||||
# and if it has an 'id' member.
|
||||
# It does not exist on all architectures.
|
||||
# Determine if the FOP_UNSIGNED_OFFSET define is present.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/mm.h>
|
||||
int conftest_mm_context_t(void) {
|
||||
return offsetof(mm_context_t, id);
|
||||
}" > conftest$$.c
|
||||
# Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
|
||||
# fop_flags") in v6.12.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_file_operations_fop_unsigned_offset_present(void) {
|
||||
return FOP_UNSIGNED_OFFSET;
|
||||
}"
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
else
|
||||
echo "#undef NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
|
||||
return
|
||||
fi
|
||||
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
pci_dev_has_ats_enabled)
|
||||
@@ -5102,6 +5086,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
cc_attr_guest_sev_snp)
|
||||
#
|
||||
# Determine if 'CC_ATTR_GUEST_SEV_SNP' is present.
|
||||
#
|
||||
# Added by commit aa5a461171f9 ("x86/mm: Extend cc_attr to
|
||||
# include AMD SEV-SNP") in v5.19.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
|
||||
#include <linux/cc_platform.h>
|
||||
#endif
|
||||
|
||||
enum cc_attr cc_attributes = CC_ATTR_GUEST_SEV_SNP;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CC_ATTR_SEV_SNP" "" "types"
|
||||
;;
|
||||
|
||||
hv_get_isolation_type)
|
||||
#
|
||||
# Determine if 'hv_get_isolation_type()' is present.
|
||||
# Added by commit faff44069ff5 ("x86/hyperv: Add Write/Read MSR
|
||||
# registers via ghcb page") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_MSHYPERV_H_PRESENT)
|
||||
#include <asm/mshyperv.h>
|
||||
#endif
|
||||
void conftest_hv_get_isolation_type(void) {
|
||||
int i;
|
||||
hv_get_isolation_type(i);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HV_GET_ISOLATION_TYPE" "" "functions"
|
||||
;;
|
||||
|
||||
drm_prime_pages_to_sg_has_drm_device_arg)
|
||||
#
|
||||
# Determine if drm_prime_pages_to_sg() has 'dev' argument.
|
||||
@@ -5269,6 +5289,45 @@ compile_test() {
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
follow_pte_arg_vma)
|
||||
#
|
||||
# Determine if the first argument of follow_pte is
|
||||
# mm_struct or vm_area_struct.
|
||||
#
|
||||
# The first argument was changed from mm_struct to vm_area_struct by
|
||||
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
|
||||
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pte_t **ptep,
|
||||
spinlock_t **ptl) {
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
|
||||
;;
|
||||
|
||||
ptep_get)
|
||||
#
|
||||
# Determine if ptep_get() is present.
|
||||
#
|
||||
# ptep_get() was added by commit 481e980a7c19
|
||||
# ("mm: Allow arches to provide ptep_get()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_ptep_get(void) {
|
||||
ptep_get();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
#
|
||||
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
|
||||
@@ -5650,6 +5709,26 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_ICC_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devm_of_icc_get)
|
||||
#
|
||||
# Determine if devm_of_icc_get() function is present
|
||||
#
|
||||
# Added by commit e145d9a ("interconnect: Add devm_of_icc_get() as
|
||||
# exported API for user interconnect API")
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_INTERCONNECT_H_PRESENT)
|
||||
#include <linux/interconnect.h>
|
||||
#endif
|
||||
void conftest_devm_of_icc_get(void)
|
||||
{
|
||||
devm_of_icc_get();
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DEVM_ICC_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
icc_set_bw)
|
||||
#
|
||||
# Determine if icc_set_bw() function is present
|
||||
@@ -6096,6 +6175,20 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pcie_reset_flr)
|
||||
#
|
||||
# Determine if the pcie_reset_flr() function is present
|
||||
#
|
||||
# Added by commit 56f107d ("PCI: Add pcie_reset_flr() with
|
||||
# 'probe' argument") in v5.15.
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
int conftest_pcie_reset_flr(void) {
|
||||
return pcie_reset_flr();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_PCIE_RESET_FLR_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devm_clk_bulk_get_all)
|
||||
#
|
||||
# Determine if devm_clk_bulk_get_all() function is present
|
||||
@@ -6253,6 +6346,32 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
acpi_video_register_backlight)
|
||||
#
|
||||
# Determine if acpi_video_register_backlight() function is present
|
||||
#
|
||||
# acpi_video_register_backlight was added by commit 3dbc80a3e4c55c
|
||||
# (ACPI: video: Make backlight class device registration a separate
|
||||
# step (v2)) for v6.0 (2022-09-02).
|
||||
# Note: the include directive for <linux/types> in this conftest is
|
||||
# necessary in order to support kernels between commit 0b9f7d93ca61
|
||||
# ("ACPI / i915: ignore firmware requests backlight change") for
|
||||
# v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port
|
||||
# to new backlight interface selection API") for v4.2 (2015-07-16).
|
||||
# Kernels within this range use the 'bool' type and the related
|
||||
# 'false' value in <acpi/video.h> without first including the
|
||||
# definitions of that type and value.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <acpi/video.h>
|
||||
void conftest_acpi_video_register_backlight(void) {
|
||||
acpi_video_register_backlight(0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions"
|
||||
;;
|
||||
|
||||
acpi_video_backlight_use_native)
|
||||
#
|
||||
# Determine if acpi_video_backlight_use_native() function is present
|
||||
@@ -6596,7 +6715,9 @@ compile_test() {
|
||||
# Determine whether drm_fbdev_generic_setup is present.
|
||||
#
|
||||
# Added by commit 9060d7f49376 ("drm/fb-helper: Finish the
|
||||
# generic fbdev emulation") in v4.19.
|
||||
# generic fbdev emulation") in v4.19. Removed by commit
|
||||
# aae4682e5d66 ("drm/fbdev-generic: Convert to fbdev-ttm")
|
||||
# in v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
@@ -6608,6 +6729,105 @@ compile_test() {
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_GENERIC_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_fbdev_ttm_setup)
|
||||
#
|
||||
# Determine whether drm_fbdev_ttm_setup is present.
|
||||
#
|
||||
# Added by commit aae4682e5d66 ("drm/fbdev-generic:
|
||||
# Convert to fbdev-ttm") in v6.11. Removed by commit
|
||||
# 1000634477d8 ("drm/fbdev-ttm:Convert to client-setup") in v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#endif
|
||||
void conftest_drm_fbdev_ttm_setup(void) {
|
||||
drm_fbdev_ttm_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_client_setup)
|
||||
#
|
||||
# Determine whether drm_client_setup is present.
|
||||
#
|
||||
# Added by commit d07fdf922592 ("drm/fbdev-ttm: Convert to
|
||||
# client-setup") in v6.13 in drm/drm_client_setup.h, but then moved
|
||||
# to drm/clients/drm_client_setup.h by commit b86711c6d6e2
|
||||
# ("drm/client: Move public client header to clients/ subdirectory")
|
||||
# in linux-next b86711c6d6e2.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#endif
|
||||
void conftest_drm_client_setup(void) {
|
||||
drm_client_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_output_poll_changed)
|
||||
#
|
||||
# Determine whether drm_mode_config_funcs.output_poll_changed
|
||||
# callback is present
|
||||
#
|
||||
# Removed by commit 446d0f4849b1 ("drm: Remove struct
|
||||
# drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug
|
||||
# event support is handled through the fbdev emulation interface
|
||||
# going forward.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT)
|
||||
#include <drm/drm_mode_config.h>
|
||||
#else
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
int conftest_drm_output_poll_changed_available(void) {
|
||||
return offsetof(struct drm_mode_config_funcs, output_poll_changed);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_devices(void) {
|
||||
aperture_remove_conflicting_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_pci_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_pci_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_pci_devices(void) {
|
||||
aperture_remove_conflicting_pci_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_pci_framebuffers)
|
||||
@@ -6703,17 +6923,17 @@ compile_test() {
|
||||
# This test is not complete and may return false positive.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto(void) {
|
||||
struct shash_desc sd;
|
||||
struct crypto_shash cs;
|
||||
@@ -6723,6 +6943,47 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
crypto_akcipher_verify)
|
||||
#
|
||||
# Determine whether the crypto_akcipher_verify API is still present.
|
||||
# It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations')
|
||||
# in v6.13-rc1 (2024-10-04).
|
||||
#
|
||||
# This test is dependent on the crypto conftest to determine whether crypto should be
|
||||
# enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify
|
||||
#
|
||||
# The test merely checks for the presence of the API, as it assumes that if the API
|
||||
# is no longer present, the new API to replace it (crypto_sig_verify) must be present.
|
||||
# If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto
|
||||
# conftest above and all crypto code will be compiled out.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto_akcipher_verify(void) {
|
||||
(void)crypto_akcipher_verify;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
ecc_digits_from_bytes)
|
||||
#
|
||||
# Determine whether ecc_digits_from_bytes is present.
|
||||
# It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from
|
||||
# reading too many bytes') in v6.10.
|
||||
#
|
||||
# This functionality is needed when crypto_akcipher_verify is not present.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/internal/ecc.h>
|
||||
void conftest_ecc_digits_from_bytes(void) {
|
||||
(void)ecc_digits_from_bytes;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
mempolicy_has_unified_nodes)
|
||||
#
|
||||
# Determine if the 'mempolicy' structure has
|
||||
@@ -6990,6 +7251,359 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_PROPERTY_BLOB_PUT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_driver_has_gem_prime_mmap)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'gem_prime_mmap'
|
||||
# function pointer.
|
||||
#
|
||||
# Removed by commit 0adec22702d4 ("drm: Remove struct
|
||||
# drm_driver.gem_prime_mmap") in v6.6.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_driver_has_gem_prime_mmap(void) {
|
||||
return offsetof(struct drm_driver, gem_prime_mmap);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP" "" "types"
|
||||
;;
|
||||
|
||||
drm_gem_prime_mmap)
|
||||
#
|
||||
# Determine if the function drm_gem_prime_mmap() is present.
|
||||
#
|
||||
# Added by commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()
|
||||
# in v5.0
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
void conftest_drm_gem_prime_mmap(void) {
|
||||
drm_gem_prime_mmap();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_MMAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
vmf_insert_mixed)
|
||||
#
|
||||
# Determine if the function vmf_insert_mixed() is present.
|
||||
#
|
||||
# Added by commit 1c8f422059ae ("mm: change return type to
|
||||
# vm_fault_t") in v4.17.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_vmf_insert_mixed() {
|
||||
vmf_insert_mixed();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VMF_INSERT_MIXED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pfn_to_pfn_t)
|
||||
#
|
||||
# Determine if the function pfn_to_pfn_t() is present.
|
||||
#
|
||||
# Added by commit 34c0fd540e79 ("mm, dax, pmem: introduce pfn_t") in
|
||||
# v4.5.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_PFN_T_H_PRESENT)
|
||||
#include <linux/pfn_t.h>
|
||||
#endif
|
||||
void conftest_pfn_to_pfn_t() {
|
||||
pfn_to_pfn_t();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PFN_TO_PFN_T_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_gem_dmabuf_mmap)
|
||||
#
|
||||
# Determine if the drm_gem_dmabuf_mmap() function is present.
|
||||
#
|
||||
# drm_gem_dmabuf_mmap() was exported by commit c308279f8798 ("drm:
|
||||
# export gem dmabuf_ops for drivers to reuse") in v4.17.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
void conftest_drm_gem_dmabuf_mmap(void) {
|
||||
drm_gem_dmabuf_mmap();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_DMABUF_MMAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_gem_prime_export_has_dev_arg)
|
||||
#
|
||||
# Determine if drm_gem_prime_export() function has a 'dev' argument.
|
||||
#
|
||||
# This argument was removed by commit e4fa8457b219 ("drm/prime:
|
||||
# Align gem_prime_export with obj_funcs.export") in v5.4.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
|
||||
void conftest_drm_gem_prime_export_has_dev_arg(
|
||||
struct drm_device *dev,
|
||||
struct drm_gem_object *obj) {
|
||||
(void) drm_gem_prime_export(dev, obj, 0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_EXPORT_HAS_DEV_ARG" "" "types"
|
||||
;;
|
||||
|
||||
dma_buf_ops_has_cache_sgt_mapping)
|
||||
#
|
||||
# Determine if dma_buf_ops structure has a 'cache_sgt_mapping'
|
||||
# member.
|
||||
#
|
||||
# dma_buf_ops::cache_sgt_mapping was added by commit f13e143e7444
|
||||
# ("dma-buf: start caching of sg_table objects v2") in v5.3.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/dma-buf.h>
|
||||
int conftest_dma_ops_has_cache_sgt_mapping(void) {
|
||||
return offsetof(struct dma_buf_ops, cache_sgt_mapping);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DMA_BUF_OPS_HAS_CACHE_SGT_MAPPING" "" "types"
|
||||
;;
|
||||
|
||||
drm_gem_object_funcs)
|
||||
#
|
||||
# Determine if the 'struct drm_gem_object_funcs' type is present.
|
||||
#
|
||||
# Added by commit b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
|
||||
# in v5.0.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
|
||||
#include <drm/drm_gem.h>
|
||||
#endif
|
||||
struct drm_gem_object_funcs funcs;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_FUNCS_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
sg_dma_page_iter)
|
||||
#
|
||||
# Determine if the struct sg_dma_page_iter is present.
|
||||
# This also serves to know if the argument type of the macro
|
||||
# sg_page_iter_dma_address() changed:
|
||||
# - before: struct sg_page_iter *piter
|
||||
# - after: struct sg_dma_page_iter *dma_iter
|
||||
#
|
||||
# Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA
|
||||
# page iterator") v5.0.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
struct sg_dma_page_iter conftest_dma_page_iter;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SG_DMA_PAGE_ITER_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
# FIXME: See if we can remove this test
|
||||
for_each_sgtable_dma_page)
|
||||
#
|
||||
# Determine if macro for_each_sgtable_dma_page is present.
|
||||
#
|
||||
# Added by commit 709d6d73c756 ("scatterlist: add generic wrappers
|
||||
# for iterating over sgtable objects") v5.7.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
void conftest_for_each_sgtable_dma_page(void) {
|
||||
for_each_sgtable_dma_page();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers is present.
|
||||
#
|
||||
# drm_aperture_remove_conflicting_framebuffers was added in commit 2916059147ea
|
||||
# ("drm/aperture: Add infrastructure for aperture ownership) in
|
||||
# v5.14-rc1 (2021-04-12)
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
void conftest_drm_aperture_remove_conflicting_framebuffers(void) {
|
||||
drm_aperture_remove_conflicting_framebuffers();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers_has_driver_arg)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers
|
||||
# takes a struct drm_driver * as its fourth argument.
|
||||
#
|
||||
# Prior to commit 97c9bfe3f6605d41eb8f1206e6e0f62b31ba15d6, the
|
||||
# second argument was a char * pointer to the driver's name.
|
||||
#
|
||||
# To test if drm_aperture_remove_conflicting_framebuffers() has
|
||||
# a req_driver argument, define a function with the expected
|
||||
# signature and then define the corresponding function
|
||||
# implementation with the expected signature. Successful compilation
|
||||
# indicates that this function has the expected signature.
|
||||
#
|
||||
# This change occurred in commit 97c9bfe3f660 ("drm/aperture: Pass
|
||||
# DRM driver structure instead of driver name") in v5.15
|
||||
# (2021-06-29).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
|
||||
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
|
||||
bool primary, const struct drm_driver *req_driver)
|
||||
{
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers
|
||||
# has its third argument as a bool.
|
||||
#
|
||||
# Prior to commit 62aeaeaa1b267c5149abee6b45967a5df3feed58, the
|
||||
# third argument was a bool for figuring out whether the legacy vga
|
||||
# stuff should be nuked, but it's only for pci devices and not
|
||||
# really needed in this function.
|
||||
#
|
||||
# To test if drm_aperture_remove_conflicting_framebuffers() has
|
||||
# a bool primary argument, define a function with the expected
|
||||
# signature and then define the corresponding function
|
||||
# implementation with the expected signature. Successful compilation
|
||||
# indicates that this function has the expected signature.
|
||||
#
|
||||
# This change occurred in commit 62aeaeaa1b26 ("drm/aperture: Remove
|
||||
# primary argument") in v6.5 (2023-04-16).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
|
||||
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
|
||||
const struct drm_driver *req_driver)
|
||||
{
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG" "" "types"
|
||||
;;
|
||||
|
||||
struct_page_has_zone_device_data)
|
||||
#
|
||||
# Determine if struct page has a 'zone_device_data' field.
|
||||
#
|
||||
# Added by commit 8a164fef9c4c ("mm: simplify ZONE_DEVICE page
|
||||
# private data") in v5.3.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm_types.h>
|
||||
int conftest_struct_page_has_zone_device_data(void) {
|
||||
return offsetof(struct page, zone_device_data);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
|
||||
;;
|
||||
|
||||
folio_test_swapcache)
|
||||
#
|
||||
# Determine if the folio_test_swapcache() function is present.
|
||||
#
|
||||
# folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
|
||||
# Add folio flag manipulation functions") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/page-flags.h>
|
||||
void conftest_folio_test_swapcache(void) {
|
||||
folio_test_swapcache();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
module_import_ns_takes_constant)
|
||||
#
|
||||
# Determine if the MODULE_IMPORT_NS macro takes a string literal
|
||||
# or constant.
|
||||
#
|
||||
# Commit cdd30ebb1b9f ("module: Convert symbol namespace to
|
||||
# string literal") changed MODULE_IMPORT_NS to take a string
|
||||
# literal in Linux kernel v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_IMPORT_NS(DMA_BUF);"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
|
||||
;;
|
||||
|
||||
|
||||
drm_driver_has_date)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'date' field.
|
||||
#
|
||||
# Removed by commit cb2e1c2136f7 ("drm: remove driver date from
|
||||
# struct drm_driver and all drivers") in linux-next, expected in
|
||||
# v6.14.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_driver_has_date(void) {
|
||||
return offsetof(struct drm_driver, date);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -15,6 +15,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_client_setup.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
@@ -29,10 +31,13 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
drm/drm_property.h \
|
||||
drm/clients/drm_client_setup.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
generated/utsrelease.h \
|
||||
linux/aperture.h \
|
||||
linux/dma-direct.h \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
@@ -99,5 +104,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h
|
||||
linux/mpi.h \
|
||||
asm/mshyperv.h \
|
||||
crypto/sig.h \
|
||||
linux/pfn_t.h
|
||||
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
120
kernel-open/nvidia-drm/nv_common_utils.h
Normal file
120
kernel-open/nvidia-drm/nv_common_utils.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_COMMON_UTILS_H__
|
||||
#define __NV_COMMON_UTILS_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
#if !defined(TRUE)
|
||||
#define TRUE NV_TRUE
|
||||
#endif
|
||||
|
||||
#if !defined(FALSE)
|
||||
#define FALSE NV_FALSE
|
||||
#endif
|
||||
|
||||
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
|
||||
|
||||
/* Get the length of a statically-sized array. */
|
||||
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
|
||||
|
||||
#define NV_INVALID_HEAD 0xFFFFFFFF
|
||||
|
||||
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
|
||||
|
||||
#if !defined(NV_MIN)
|
||||
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
|
||||
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
|
||||
|
||||
#if !defined(NV_MAX)
|
||||
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
|
||||
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
|
||||
|
||||
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
|
||||
{
|
||||
if (val < min) {
|
||||
return min;
|
||||
}
|
||||
if (val > max) {
|
||||
return max;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
|
||||
|
||||
/*
|
||||
* Macros used for computing palette entries:
|
||||
*
|
||||
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
|
||||
* of source_size bits into a value of target_size bits by shifting
|
||||
* the source value into the high bits and replicating the high bits
|
||||
* of the value into the low bits of the result.
|
||||
*
|
||||
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
|
||||
* that has w bits to an appropriate entry in a LUT of 256 entries.
|
||||
*/
|
||||
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
|
||||
int source_size,
|
||||
int result_size)
|
||||
{
|
||||
return (val << (result_size - source_size)) |
|
||||
(val >> ((source_size << 1) - result_size));
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
|
||||
{
|
||||
return NV_UNDER_REPLICATE(val, depth, 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use __builtin_ffs where it is supported, or provide an equivalent
|
||||
* implementation for platforms like riscv where it is not.
|
||||
*/
|
||||
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
return __builtin_ffs(x);
|
||||
}
|
||||
#else
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
if (x == 0)
|
||||
return 0;
|
||||
|
||||
LOWESTBITIDX_32(x);
|
||||
|
||||
return 1 + x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_COMMON_UTILS_H__ */
|
||||
@@ -62,6 +62,17 @@
|
||||
#undef NV_DRM_FENCE_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
// XXX remove dependency on DRM_TTM_HELPER by implementing nvidia-drm's own
|
||||
// .fbdev_probe callback that uses NVKMS kapi
|
||||
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_CLIENT_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
|
||||
* or drm_crtc_enable_color_mgmt() exist.
|
||||
@@ -85,7 +96,11 @@
|
||||
|
||||
/* For nv_drm_gem_prime_force_fence_signal */
|
||||
#ifndef spin_is_locked
|
||||
#if ((__FreeBSD_version >= 1500000) && (__FreeBSD_version < 1500018)) || (__FreeBSD_version < 1401501)
|
||||
#define spin_is_locked(lock) mtx_owned(lock.m)
|
||||
#else
|
||||
#define spin_is_locked(lock) mtx_owned(lock)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef rwsem_is_locked
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -38,6 +38,13 @@
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
enum nv_drm_transfer_function {
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT,
|
||||
NV_DRM_TRANSFER_FUNCTION_LINEAR,
|
||||
NV_DRM_TRANSFER_FUNCTION_PQ,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX,
|
||||
};
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@@ -63,6 +70,8 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct NvKmsLUTCaps olut_caps;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
@@ -142,6 +151,12 @@ struct nv_drm_crtc_state {
|
||||
* nv_drm_atomic_crtc_destroy_state().
|
||||
*/
|
||||
struct nv_drm_flip *nv_flip;
|
||||
|
||||
enum nv_drm_transfer_function regamma_tf;
|
||||
struct drm_property_blob *regamma_lut;
|
||||
uint64_t regamma_divisor;
|
||||
struct nv_drm_lut_surface *regamma_drm_lut_surface;
|
||||
NvBool regamma_changed;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *state)
|
||||
@@ -149,6 +164,11 @@ static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *
|
||||
return container_of(state, struct nv_drm_crtc_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_crtc_state *to_nv_crtc_state_const(const struct drm_crtc_state *state)
|
||||
{
|
||||
return container_of(state, struct nv_drm_crtc_state, base);
|
||||
}
|
||||
|
||||
struct nv_drm_plane {
|
||||
/**
|
||||
* @base:
|
||||
@@ -170,6 +190,9 @@ struct nv_drm_plane {
|
||||
* Index of this plane in the per head array of layers.
|
||||
*/
|
||||
uint32_t layer_idx;
|
||||
|
||||
struct NvKmsLUTCaps ilut_caps;
|
||||
struct NvKmsLUTCaps tmo_caps;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
@@ -180,6 +203,22 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
return container_of(plane, struct nv_drm_plane, base);
|
||||
}
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct NvKmsKapiDevice *pDevice;
|
||||
struct NvKmsKapiMemory *nvkms_memory;
|
||||
struct NvKmsKapiSurface *nvkms_surface;
|
||||
struct {
|
||||
NvU32 vssSegments;
|
||||
enum NvKmsLUTVssType vssType;
|
||||
|
||||
NvU32 lutEntries;
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
|
||||
} properties;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
@@ -187,6 +226,20 @@ struct nv_drm_plane_state {
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
struct drm_property_blob *lms_ctm;
|
||||
struct drm_property_blob *lms_to_itp_ctm;
|
||||
struct drm_property_blob *itp_to_lms_ctm;
|
||||
struct drm_property_blob *blend_ctm;
|
||||
|
||||
enum nv_drm_transfer_function degamma_tf;
|
||||
struct drm_property_blob *degamma_lut;
|
||||
uint64_t degamma_multiplier; /* S31.32 Sign-Magnitude Format */
|
||||
struct nv_drm_lut_surface *degamma_drm_lut_surface;
|
||||
NvBool degamma_changed;
|
||||
|
||||
struct drm_property_blob *tmo_lut;
|
||||
struct nv_drm_lut_surface *tmo_drm_lut_surface;
|
||||
NvBool tmo_changed;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
|
||||
@@ -64,12 +64,27 @@
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#include <drm/drm_fbdev_generic.h>
|
||||
#endif
|
||||
|
||||
@@ -105,16 +120,16 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
static char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
return "scRGB Linear FP16";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
return "BT.2100 PQ";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
@@ -122,8 +137,30 @@ static const char* nv_get_input_colorspace_name(
|
||||
}
|
||||
};
|
||||
|
||||
static char* nv_get_transfer_function_name(
|
||||
enum nv_drm_transfer_function tf)
|
||||
{
|
||||
switch (tf) {
|
||||
case NV_DRM_TRANSFER_FUNCTION_LINEAR:
|
||||
return "Linear";
|
||||
case NV_DRM_TRANSFER_FUNCTION_PQ:
|
||||
return "PQ (Perceptual Quantizer)";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported transfer function");
|
||||
#if defined(fallthrough)
|
||||
fallthrough;
|
||||
#else
|
||||
/* Fallthrough */
|
||||
#endif
|
||||
case NV_DRM_TRANSFER_FUNCTION_DEFAULT:
|
||||
return "Default";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
{
|
||||
struct drm_connector *connector = NULL;
|
||||
@@ -167,6 +204,7 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
|
||||
|
||||
static struct drm_framebuffer *nv_drm_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
@@ -204,7 +242,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
|
||||
.atomic_check = nv_drm_atomic_check,
|
||||
.atomic_commit = nv_drm_atomic_commit,
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
.output_poll_changed = nv_drm_output_poll_changed,
|
||||
#endif
|
||||
};
|
||||
|
||||
static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
|
||||
@@ -364,15 +404,21 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
struct drm_prop_enum_list colorspace_enum_list[3] = { };
|
||||
struct drm_prop_enum_list tf_enum_list[NV_DRM_TRANSFER_FUNCTION_MAX] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
colorspace_enum_list[len].type = i;
|
||||
colorspace_enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
for (i = 0; i < NV_DRM_TRANSFER_FUNCTION_MAX; i++) {
|
||||
tf_enum_list[i].type = i;
|
||||
tf_enum_list[i].name = nv_get_transfer_function_name(i);
|
||||
}
|
||||
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
@@ -384,7 +430,7 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
colorspace_enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
@@ -399,6 +445,109 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_plane_lms_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_LMS_CTM", 0);
|
||||
if (nv_dev->nv_plane_lms_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_lms_to_itp_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_LMS_TO_ITP_CTM", 0);
|
||||
if (nv_dev->nv_plane_lms_to_itp_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_itp_to_lms_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_ITP_TO_LMS_CTM", 0);
|
||||
if (nv_dev->nv_plane_itp_to_lms_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_blend_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_BLEND_CTM", 0);
|
||||
if (nv_dev->nv_plane_blend_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// Degamma TF + LUT + LUT Size + Multiplier
|
||||
|
||||
nv_dev->nv_plane_degamma_tf_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0,
|
||||
"NV_PLANE_DEGAMMA_TF", tf_enum_list,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX);
|
||||
if (nv_dev->nv_plane_degamma_tf_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_DEGAMMA_LUT", 0);
|
||||
if (nv_dev->nv_plane_degamma_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_multiplier_property =
|
||||
drm_property_create_range(nv_dev->dev, 0,
|
||||
"NV_PLANE_DEGAMMA_MULTIPLIER", 0,
|
||||
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// TMO LUT + LUT Size
|
||||
|
||||
nv_dev->nv_plane_tmo_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_TMO_LUT", 0);
|
||||
if (nv_dev->nv_plane_tmo_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_tmo_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_PLANE_TMO_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_plane_tmo_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// REGAMMA TF + LUT + LUT Size + Divisor
|
||||
|
||||
nv_dev->nv_crtc_regamma_tf_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0,
|
||||
"NV_CRTC_REGAMMA_TF", tf_enum_list,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX);
|
||||
if (nv_dev->nv_crtc_regamma_tf_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_crtc_regamma_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_CRTC_REGAMMA_LUT", 0);
|
||||
if (nv_dev->nv_crtc_regamma_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_crtc_regamma_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_CRTC_REGAMMA_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_crtc_regamma_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
// S31.32
|
||||
nv_dev->nv_crtc_regamma_divisor_property =
|
||||
drm_property_create_range(nv_dev->dev, 0,
|
||||
"NV_CRTC_REGAMMA_DIVISOR",
|
||||
(((NvU64) 1) << 32), // No values between 0 and 1
|
||||
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
|
||||
if (nv_dev->nv_crtc_regamma_divisor_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -476,7 +625,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/*
|
||||
* If fbdev is enabled, take modeset ownership now before other DRM clients
|
||||
* can take master (and thus NVKMS ownership).
|
||||
@@ -548,6 +697,13 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
ret = nv_drm_create_properties(nv_dev);
|
||||
if (ret < 0) {
|
||||
drm_mode_config_cleanup(dev);
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
#endif
|
||||
nvKms->freeDevice(nv_dev->pDevice);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@@ -610,7 +766,7 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
|
||||
/* Release modeset ownership if fbdev is enabled */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
@@ -710,36 +866,37 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
|
||||
#endif
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
int err;
|
||||
|
||||
nv_drm_revoke_modeset_permission(dev, file_priv, 0);
|
||||
nv_drm_revoke_sub_ownership(dev);
|
||||
|
||||
/*
|
||||
* After dropping nvkms modeset onwership, it is not guaranteed that
|
||||
* drm and nvkms modeset state will remain in sync. Therefore, disable
|
||||
* all outputs and crtcs before dropping nvkms modeset ownership.
|
||||
*
|
||||
* First disable all active outputs atomically and then disable each crtc one
|
||||
* by one, there is not helper function available to disable all crtcs
|
||||
* atomically.
|
||||
*/
|
||||
|
||||
drm_modeset_lock_all(dev);
|
||||
|
||||
if ((err = nv_drm_atomic_helper_disable_all(
|
||||
dev,
|
||||
dev->mode_config.acquire_ctx)) != 0) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"nv_drm_atomic_helper_disable_all failed with error code %d !",
|
||||
err);
|
||||
}
|
||||
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
if (!nv_dev->hasFramebufferConsole) {
|
||||
int err;
|
||||
|
||||
/*
|
||||
* After dropping nvkms modeset onwership, it is not guaranteed that drm
|
||||
* and nvkms modeset state will remain in sync. Therefore, disable all
|
||||
* outputs and crtcs before dropping nvkms modeset ownership.
|
||||
*
|
||||
* First disable all active outputs atomically and then disable each
|
||||
* crtc one by one, there is not helper function available to disable
|
||||
* all crtcs atomically.
|
||||
*/
|
||||
|
||||
drm_modeset_lock_all(dev);
|
||||
|
||||
if ((err = nv_drm_atomic_helper_disable_all(
|
||||
dev,
|
||||
dev->mode_config.acquire_ctx)) != 0) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"nv_drm_atomic_helper_disable_all failed with error code %d !",
|
||||
err);
|
||||
}
|
||||
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
}
|
||||
@@ -1567,6 +1724,10 @@ static const struct file_operations nv_drm_fops = {
|
||||
.read = drm_read,
|
||||
|
||||
.llseek = noop_llseek,
|
||||
|
||||
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
|
||||
.fop_flags = FOP_UNSIGNED_OFFSET,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
@@ -1684,14 +1845,19 @@ static struct drm_driver nv_drm_driver = {
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
|
||||
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
* Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made drm_gem_prime_handle_to_fd() /
|
||||
* drm_gem_prime_fd_to_handle() the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, respectively.
|
||||
*
|
||||
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
|
||||
* import/export for all drivers") made these helpers the default when
|
||||
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
|
||||
* to just skip specifying them if the helpers aren't present.
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers for
|
||||
* fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle(). However, because of the aforementioned commit,
|
||||
* it's fine to just skip specifying them in this case.
|
||||
*
|
||||
* Linux kernel v6.7 commit 0514f63cfff3 ("Revert "drm/prime: Unexport helpers
|
||||
* for fd/handle conversion"") exported the helpers again, but left the default
|
||||
* behavior intact. Nonetheless, it does not hurt to specify them.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
@@ -1703,6 +1869,21 @@ static struct drm_driver nv_drm_driver = {
|
||||
.gem_prime_import = nv_drm_gem_prime_import,
|
||||
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
|
||||
|
||||
/*
|
||||
* Linux kernel v5.0 commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()")
|
||||
* added drm_gem_prime_mmap().
|
||||
*
|
||||
* Linux kernel v6.6 commit 0adec22702d4 ("drm: Remove struct
|
||||
* drm_driver.gem_prime_mmap") removed .gem_prime_mmap, but replaced it with a
|
||||
* direct call to drm_gem_prime_mmap().
|
||||
*
|
||||
* TODO: Support .gem_prime_mmap on Linux < v5.0 using internal implementation.
|
||||
*/
|
||||
#if defined(NV_DRM_GEM_PRIME_MMAP_PRESENT) && \
|
||||
defined(NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP)
|
||||
.gem_prime_mmap = drm_gem_prime_mmap,
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_CALLBACKS)
|
||||
.gem_prime_export = drm_gem_prime_export,
|
||||
.gem_prime_get_sg_table = nv_drm_gem_prime_get_sg_table,
|
||||
@@ -1736,13 +1917,20 @@ static struct drm_driver nv_drm_driver = {
|
||||
.name = "nvidia-drm",
|
||||
|
||||
.desc = "NVIDIA DRM driver",
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DATE)
|
||||
.date = "20160202",
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
|
||||
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
|
||||
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
|
||||
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
|
||||
#endif
|
||||
// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE) && defined(DRM_FBDEV_TTM_DRIVER_OPS)
|
||||
DRM_FBDEV_TTM_DRIVER_OPS,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -1838,22 +2026,35 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
goto failed_drm_register;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_drm_fbdev_module_param &&
|
||||
drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, &nv_drm_driver);
|
||||
#else
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
|
||||
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT)
|
||||
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
}
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
#endif
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
|
||||
#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */
|
||||
|
||||
/* Add NVIDIA-DRM device into list */
|
||||
|
||||
@@ -1995,12 +2196,12 @@ void nv_drm_suspend_resume(NvBool suspend)
|
||||
|
||||
if (suspend) {
|
||||
drm_kms_helper_poll_disable(dev);
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
|
||||
#endif
|
||||
drm_mode_config_reset(dev);
|
||||
} else {
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
|
||||
#endif
|
||||
drm_kms_helper_poll_enable(dev);
|
||||
|
||||
@@ -36,12 +36,15 @@
|
||||
|
||||
static void __nv_drm_framebuffer_free(struct nv_drm_framebuffer *nv_fb)
|
||||
{
|
||||
struct drm_framebuffer *fb = &nv_fb->base;
|
||||
uint32_t i;
|
||||
|
||||
/* Unreference gem object */
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
nv_drm_gem_object_unreference_unlocked(nv_fb->nv_gem[i]);
|
||||
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
|
||||
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
|
||||
if (gem != NULL) {
|
||||
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(gem);
|
||||
nv_drm_gem_object_unreference_unlocked(nv_gem);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,10 +72,8 @@ static int
|
||||
nv_drm_framebuffer_create_handle(struct drm_framebuffer *fb,
|
||||
struct drm_file *file, unsigned int *handle)
|
||||
{
|
||||
struct nv_drm_framebuffer *nv_fb = to_nv_framebuffer(fb);
|
||||
|
||||
return nv_drm_gem_handle_create(file,
|
||||
nv_fb->nv_gem[0],
|
||||
to_nv_gem_object(nv_fb_get_gem_obj(fb, 0)),
|
||||
handle);
|
||||
}
|
||||
|
||||
@@ -88,6 +89,7 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_framebuffer *nv_fb;
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
const int num_planes = nv_drm_format_num_planes(cmd->pixel_format);
|
||||
uint32_t i;
|
||||
|
||||
@@ -101,21 +103,22 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (num_planes > ARRAY_SIZE(nv_fb->nv_gem)) {
|
||||
if (num_planes > NVKMS_MAX_PLANES_PER_SURFACE) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Unsupported number of planes");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_planes; i++) {
|
||||
if ((nv_fb->nv_gem[i] = nv_drm_gem_object_lookup(
|
||||
dev,
|
||||
file,
|
||||
cmd->handles[i])) == NULL) {
|
||||
nv_gem = nv_drm_gem_object_lookup(dev, file, cmd->handles[i]);
|
||||
|
||||
if (nv_gem == NULL) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
"Failed to find gem object of type nvkms memory");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
nv_fb_set_gem_obj(&nv_fb->base, i, &nv_gem->base);
|
||||
}
|
||||
|
||||
return nv_fb;
|
||||
@@ -135,12 +138,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct NvKmsKapiCreateSurfaceParams params = { };
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct drm_framebuffer *fb = &nv_fb->base;
|
||||
uint32_t i;
|
||||
int ret;
|
||||
|
||||
/* Initialize the base framebuffer object and add it to drm subsystem */
|
||||
|
||||
ret = drm_framebuffer_init(dev, &nv_fb->base, &nv_framebuffer_funcs);
|
||||
ret = drm_framebuffer_init(dev, fb, &nv_framebuffer_funcs);
|
||||
if (ret != 0) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
@@ -148,23 +153,32 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
|
||||
nv_fb->nv_gem[i]->pMemory)) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Framebuffer memory not appropriate for scanout");
|
||||
goto fail;
|
||||
}
|
||||
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
|
||||
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
|
||||
if (gem != NULL) {
|
||||
nv_gem = to_nv_gem_object(gem);
|
||||
|
||||
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
|
||||
params.planes[i].offset = nv_fb->base.offsets[i];
|
||||
params.planes[i].pitch = nv_fb->base.pitches[i];
|
||||
params.planes[i].memory = nv_gem->pMemory;
|
||||
params.planes[i].offset = fb->offsets[i];
|
||||
params.planes[i].pitch = fb->pitches[i];
|
||||
|
||||
/*
|
||||
* XXX Use drm_framebuffer_funcs.dirty and
|
||||
* drm_fb_helper_funcs.fb_dirty instead
|
||||
*
|
||||
* Currently using noDisplayCaching when registering surfaces with
|
||||
* NVKMS that are using memory allocated through the DRM
|
||||
* Dumb-Buffers API. This prevents Display Idle Frame Rate from
|
||||
* kicking in and preventing CPU updates to the surface memory from
|
||||
* not being reflected on the display. Ideally, DIFR would be
|
||||
* dynamically disabled whenever a user of the memory blits to the
|
||||
* frontbuffer. DRM provides the needed callbacks to achieve this.
|
||||
*/
|
||||
params.noDisplayCaching |= !!nv_gem->is_drm_dumb;
|
||||
}
|
||||
}
|
||||
params.height = nv_fb->base.height;
|
||||
params.width = nv_fb->base.width;
|
||||
params.height = fb->height;
|
||||
params.width = fb->width;
|
||||
params.format = format;
|
||||
|
||||
if (have_modifier) {
|
||||
@@ -199,7 +213,7 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
drm_framebuffer_cleanup(fb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,10 @@
|
||||
struct nv_drm_framebuffer {
|
||||
struct NvKmsKapiSurface *pSurface;
|
||||
|
||||
struct nv_drm_gem_object*
|
||||
nv_gem[NVKMS_MAX_PLANES_PER_SURFACE];
|
||||
#if !defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
struct drm_gem_object*
|
||||
obj[NVKMS_MAX_PLANES_PER_SURFACE];
|
||||
#endif
|
||||
|
||||
struct drm_framebuffer base;
|
||||
};
|
||||
@@ -56,6 +58,29 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
|
||||
return container_of(fb, struct nv_drm_framebuffer, base);
|
||||
}
|
||||
|
||||
static inline struct drm_gem_object *nv_fb_get_gem_obj(
|
||||
struct drm_framebuffer *fb,
|
||||
uint32_t plane)
|
||||
{
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
return fb->obj[plane];
|
||||
#else
|
||||
return to_nv_framebuffer(fb)->obj[plane];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_fb_set_gem_obj(
|
||||
struct drm_framebuffer *fb,
|
||||
uint32_t plane,
|
||||
struct drm_gem_object *obj)
|
||||
{
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
fb->obj[plane] = obj;
|
||||
#else
|
||||
to_nv_framebuffer(fb)->obj[plane] = obj;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *file,
|
||||
|
||||
@@ -71,9 +71,20 @@ static void __nv_drm_gem_nvkms_memory_free(struct nv_drm_gem_object *nv_gem)
|
||||
nv_drm_free(nv_nvkms_memory);
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory);
|
||||
|
||||
static int __nv_drm_gem_nvkms_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return drm_gem_mmap_obj(&nv_gem->base,
|
||||
drm_vma_node_size(&nv_gem->base.vma_node) << PAGE_SHIFT, vma);
|
||||
}
|
||||
@@ -146,11 +157,18 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
|
||||
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
|
||||
|
||||
if (!nv_dev->hasVideoMemory) {
|
||||
return 0;
|
||||
mutex_lock(&nv_nvkms_memory->map_lock);
|
||||
|
||||
if (nv_nvkms_memory->physically_mapped) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->isVidmem(pMemory)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->mapMemory(nv_dev->pDevice,
|
||||
@@ -161,7 +179,8 @@ static int __nv_drm_gem_nvkms_map(
|
||||
nv_dev,
|
||||
"Failed to map NvKmsKapiMemory 0x%p",
|
||||
pMemory);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
|
||||
@@ -177,7 +196,9 @@ static int __nv_drm_gem_nvkms_map(
|
||||
|
||||
nv_nvkms_memory->physically_mapped = true;
|
||||
|
||||
return 0;
|
||||
done:
|
||||
mutex_unlock(&nv_nvkms_memory->map_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
@@ -186,14 +207,40 @@ static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
if (nv_nvkms_memory->physically_mapped) {
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this buffer isn't physically mapped, it might be backed by struct
|
||||
* pages. Use vmap in that case. Do a noncached mapping for system memory
|
||||
* as display is non io-coherent device in case of Tegra.
|
||||
*/
|
||||
if (nv_nvkms_memory->pages_count > 0) {
|
||||
return nv_drm_vmap(nv_nvkms_memory->pages,
|
||||
nv_nvkms_memory->pages_count,
|
||||
false);
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static void __nv_drm_gem_nvkms_prime_vunmap(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
void *address)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped &&
|
||||
nv_nvkms_memory->pages_count > 0) {
|
||||
nv_drm_vunmap(address);
|
||||
}
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
@@ -201,17 +248,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
uint64_t *offset)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return nv_drm_gem_create_mmap_offset(&nv_nvkms_memory->base, offset);
|
||||
return nv_drm_gem_create_mmap_offset(nv_gem, offset);
|
||||
}
|
||||
|
||||
static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
@@ -223,7 +260,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
struct sg_table *sg_table;
|
||||
|
||||
if (nv_nvkms_memory->pages_count == 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
@@ -241,6 +278,7 @@ const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
|
||||
.free = __nv_drm_gem_nvkms_memory_free,
|
||||
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
|
||||
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
|
||||
.prime_vunmap = __nv_drm_gem_nvkms_prime_vunmap,
|
||||
.mmap = __nv_drm_gem_nvkms_mmap,
|
||||
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
|
||||
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
|
||||
@@ -265,6 +303,7 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_init(&nv_nvkms_memory->map_lock);
|
||||
nv_nvkms_memory->pPhysicalAddress = NULL;
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
@@ -273,7 +312,7 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
pMemory,
|
||||
&pages,
|
||||
&numPages) &&
|
||||
!nv_dev->hasVideoMemory) {
|
||||
!nvKms->isVidmem(pMemory)) {
|
||||
/* GetMemoryPages may fail for vidmem allocations,
|
||||
* but it should not fail for sysmem allocations. */
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
@@ -346,6 +385,8 @@ int nv_drm_dumb_create(
|
||||
goto nvkms_gem_obj_init_failed;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->base.is_drm_dumb = true;
|
||||
|
||||
/* Always map dumb buffer memory up front. Clients are only expected
|
||||
* to use dumb buffers for software rendering, so they're not much use
|
||||
* without a CPU mapping.
|
||||
|
||||
@@ -32,8 +32,15 @@
|
||||
struct nv_drm_gem_nvkms_memory {
|
||||
struct nv_drm_gem_object base;
|
||||
|
||||
/*
|
||||
* Lock to protect concurrent writes to physically_mapped, pPhysicalAddress,
|
||||
* and pWriteCombinedIORemapAddress.
|
||||
*
|
||||
* __nv_drm_gem_nvkms_map(), the sole writer, is structured such that
|
||||
* readers are not required to hold the lock.
|
||||
*/
|
||||
struct mutex map_lock;
|
||||
bool physically_mapped;
|
||||
|
||||
void *pPhysicalAddress;
|
||||
void *pWriteCombinedIORemapAddress;
|
||||
|
||||
|
||||
@@ -36,6 +36,10 @@
|
||||
#include "linux/mm.h"
|
||||
#include "nv-mm.h"
|
||||
|
||||
#if defined(NV_LINUX_PFN_T_H_PRESENT)
|
||||
#include "linux/pfn_t.h"
|
||||
#endif
|
||||
|
||||
#if defined(NV_BSD)
|
||||
#include <vm/vm_pageout.h>
|
||||
#endif
|
||||
@@ -68,7 +72,8 @@ static void *__nv_drm_gem_user_memory_prime_vmap(
|
||||
struct nv_drm_gem_user_memory *nv_user_memory = to_nv_user_memory(nv_gem);
|
||||
|
||||
return nv_drm_vmap(nv_user_memory->pages,
|
||||
nv_user_memory->pages_count);
|
||||
nv_user_memory->pages_count,
|
||||
true);
|
||||
}
|
||||
|
||||
static void __nv_drm_gem_user_memory_prime_vunmap(
|
||||
@@ -103,6 +108,37 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX) && !defined(NV_VMF_INSERT_MIXED_PRESENT)
|
||||
static vm_fault_t __nv_vm_insert_mixed_helper(
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long pfn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
#if defined(NV_PFN_TO_PFN_T_PRESENT)
|
||||
ret = vm_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
|
||||
#else
|
||||
ret = vm_insert_mixed(vma, address, pfn);
|
||||
#endif
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
case -EBUSY:
|
||||
/*
|
||||
* EBUSY indicates that another thread already handled
|
||||
* the faulted range.
|
||||
*/
|
||||
return VM_FAULT_NOPAGE;
|
||||
case -ENOMEM:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
struct vm_area_struct *vma,
|
||||
@@ -112,36 +148,19 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
unsigned long address = nv_page_fault_va(vmf);
|
||||
struct drm_gem_object *gem = vma->vm_private_data;
|
||||
unsigned long page_offset;
|
||||
vm_fault_t ret;
|
||||
unsigned long pfn;
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_user_memory->pages[page_offset]);
|
||||
|
||||
#if !defined(NV_LINUX)
|
||||
ret = vmf_insert_pfn(vma, address, page_to_pfn(nv_user_memory->pages[page_offset]));
|
||||
#else /* !defined(NV_LINUX) */
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
case -EBUSY:
|
||||
/*
|
||||
* EBUSY indicates that another thread already handled
|
||||
* the faulted range.
|
||||
*/
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
break;
|
||||
case -ENOMEM:
|
||||
ret = VM_FAULT_OOM;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
break;
|
||||
}
|
||||
#endif /* !defined(NV_LINUX) */
|
||||
|
||||
return ret;
|
||||
return vmf_insert_pfn(vma, address, pfn);
|
||||
#elif defined(NV_VMF_INSERT_MIXED_PRESENT)
|
||||
return vmf_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
|
||||
#else
|
||||
return __nv_vm_insert_mixed_helper(vma, address, pfn);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_user_create_mmap_offset(
|
||||
|
||||
@@ -144,6 +144,12 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
#endif
|
||||
|
||||
drm_gem_private_object_init(dev, &nv_gem->base, size);
|
||||
|
||||
/* Create mmap offset early for drm_gem_prime_mmap(), if possible. */
|
||||
if (nv_gem->ops->create_mmap_offset) {
|
||||
uint64_t offset;
|
||||
nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, &offset);
|
||||
}
|
||||
}
|
||||
|
||||
struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
|
||||
@@ -166,8 +172,11 @@ struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
|
||||
*/
|
||||
gem_dst = nv_gem_src->ops->prime_dup(dev, nv_gem_src);
|
||||
|
||||
if (gem_dst)
|
||||
return gem_dst;
|
||||
if (gem_dst == NULL) {
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
}
|
||||
|
||||
return gem_dst;
|
||||
}
|
||||
}
|
||||
#endif /* NV_DMA_BUF_OWNER_PRESENT */
|
||||
@@ -232,6 +241,7 @@ int nv_drm_gem_map_offset_ioctl(struct drm_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* mmap offset creation is idempotent, fetch it by creating it again. */
|
||||
if (nv_gem->ops->create_mmap_offset) {
|
||||
ret = nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, ¶ms->offset);
|
||||
} else {
|
||||
|
||||
@@ -73,6 +73,8 @@ struct nv_drm_gem_object {
|
||||
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
bool is_drm_dumb;
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_t resv;
|
||||
#endif
|
||||
|
||||
@@ -40,8 +40,13 @@
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE) || \
|
||||
defined(NV_DRM_COLOR_CTM_3X4_PRESENT) || \
|
||||
defined(NV_DRM_COLOR_LUT_PRESENT)
|
||||
/*
|
||||
* For DRM_MODE_ROTATE_*, DRM_MODE_REFLECT_*, struct drm_color_ctm_3x4, and
|
||||
* struct drm_color_lut.
|
||||
*/
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
@@ -358,6 +363,24 @@ static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_property_blob_put(struct drm_property_blob *blob)
|
||||
{
|
||||
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
|
||||
drm_property_blob_put(blob);
|
||||
#else
|
||||
drm_property_unreference_blob(blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_property_blob_get(struct drm_property_blob *blob)
|
||||
{
|
||||
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
|
||||
drm_property_blob_get(blob);
|
||||
#else
|
||||
drm_property_reference_blob(blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
@@ -625,6 +648,31 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
#define DRM_UNLOCKED 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct drm_color_ctm_3x4 was added by commit 6872a189be50 ("drm/amd/display:
|
||||
* Add 3x4 CTM support for plane CTM") in v6.8. For backwards compatibility,
|
||||
* define it when not present.
|
||||
*/
|
||||
#if !defined(NV_DRM_COLOR_CTM_3X4_PRESENT)
|
||||
struct drm_color_ctm_3x4 {
|
||||
__u64 matrix[12];
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct drm_color_lut was added by commit 5488dc16fde7 ("drm: introduce pipe
|
||||
* color correction properties") in v4.6. For backwards compatibility, define it
|
||||
* when not present.
|
||||
*/
|
||||
#if !defined(NV_DRM_COLOR_LUT_PRESENT)
|
||||
struct drm_color_lut {
|
||||
__u16 red;
|
||||
__u16 green;
|
||||
__u16 blue;
|
||||
__u16 reserved;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_vma_offset_exact_lookup_locked() were added
|
||||
* by kernel commit 2225cfe46bcc which was Signed-off-by:
|
||||
|
||||
@@ -34,10 +34,10 @@ MODULE_PARM_DESC(
|
||||
"Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
|
||||
module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
MODULE_PARM_DESC(
|
||||
fbdev,
|
||||
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
|
||||
"Create a framebuffer device (1 = enable (default), 0 = disable)");
|
||||
module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -192,6 +192,7 @@ static int __nv_drm_convert_in_fences(
|
||||
&to_nv_crtc_state(crtc_state)->req_config;
|
||||
struct nv_drm_plane_fence_cb_data *fence_data;
|
||||
uint32_t semaphore_index;
|
||||
uint32_t idx_count;
|
||||
int ret, i;
|
||||
|
||||
if (!crtc_state->active) {
|
||||
@@ -244,9 +245,14 @@ static int __nv_drm_convert_in_fences(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
semaphore_index = nv_drm_next_display_semaphore(nv_dev);
|
||||
for (idx_count = 0; idx_count < nv_dev->display_semaphores.count; idx_count++) {
|
||||
semaphore_index = nv_drm_next_display_semaphore(nv_dev);
|
||||
if (nvKms->tryInitDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvKms->resetDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
|
||||
if (idx_count == nv_dev->display_semaphores.count) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to initialize semaphore for plane fence");
|
||||
@@ -408,6 +414,31 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
if (commit) {
|
||||
/*
|
||||
* This function does what is necessary to prepare the framebuffers
|
||||
* attached to each new plane in the state for scan out, mostly by
|
||||
* calling back into driver callbacks the NVIDIA driver does not
|
||||
* provide. The end result is that all it does on the NVIDIA driver
|
||||
* is populate the plane state's dma fence pointers with any implicit
|
||||
* sync fences attached to the GEM objects associated with those planes
|
||||
* in the new state, prefering explicit sync fences when appropriate.
|
||||
* This must be done prior to converting the per-plane fences to
|
||||
* semaphore waits below.
|
||||
*
|
||||
* Note this only works when the drm_framebuffer:obj[] field is present
|
||||
* and populated, so skip calling this function on kernels where that
|
||||
* field is not present.
|
||||
*/
|
||||
ret = drm_atomic_helper_prepare_planes(dev, state);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif /* defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT) */
|
||||
|
||||
memset(requested_config, 0, sizeof(*requested_config));
|
||||
|
||||
/* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
#endif
|
||||
|
||||
bool nv_drm_modeset_module_param = false;
|
||||
bool nv_drm_fbdev_module_param = false;
|
||||
bool nv_drm_fbdev_module_param = true;
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
@@ -156,9 +156,15 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
#define VM_USERMAP 0
|
||||
#endif
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached)
|
||||
{
|
||||
return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
if (!cached) {
|
||||
prot = pgprot_noncached(PAGE_KERNEL);
|
||||
}
|
||||
|
||||
return vmap(pages, pages_count, VM_USERMAP, prot);
|
||||
}
|
||||
|
||||
void nv_drm_vunmap(void *address)
|
||||
|
||||
@@ -59,14 +59,20 @@ typedef struct nv_timer nv_drm_timer;
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
/* Set to true when the atomic modeset feature is enabled. */
|
||||
extern bool nv_drm_modeset_module_param;
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/* Set to true when the nvidia-drm driver should install a framebuffer device */
|
||||
extern bool nv_drm_fbdev_module_param;
|
||||
#endif
|
||||
@@ -84,7 +90,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages);
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count);
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached);
|
||||
|
||||
void nv_drm_vunmap(void *address);
|
||||
|
||||
|
||||
@@ -163,6 +163,24 @@ struct nv_drm_device {
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct drm_property *nv_plane_lms_ctm_property;
|
||||
struct drm_property *nv_plane_lms_to_itp_ctm_property;
|
||||
struct drm_property *nv_plane_itp_to_lms_ctm_property;
|
||||
struct drm_property *nv_plane_blend_ctm_property;
|
||||
|
||||
struct drm_property *nv_plane_degamma_tf_property;
|
||||
struct drm_property *nv_plane_degamma_lut_property;
|
||||
struct drm_property *nv_plane_degamma_lut_size_property;
|
||||
struct drm_property *nv_plane_degamma_multiplier_property;
|
||||
|
||||
struct drm_property *nv_plane_tmo_lut_property;
|
||||
struct drm_property *nv_plane_tmo_lut_size_property;
|
||||
|
||||
struct drm_property *nv_crtc_regamma_tf_property;
|
||||
struct drm_property *nv_crtc_regamma_lut_property;
|
||||
struct drm_property *nv_crtc_regamma_lut_size_property;
|
||||
struct drm_property *nv_crtc_regamma_divisor_property;
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
|
||||
@@ -66,11 +66,18 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
|
||||
@@ -130,3 +137,11 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffe
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -86,6 +86,12 @@ module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
static bool opportunistic_display_sync = true;
|
||||
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
|
||||
|
||||
static enum NvKmsDebugForceColorSpace debug_force_color_space = NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
|
||||
module_param_named(debug_force_color_space, debug_force_color_space, uint, 0400);
|
||||
|
||||
static bool enable_overlay_layers = true;
|
||||
module_param_named(enable_overlay_layers, enable_overlay_layers, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@@ -96,19 +102,40 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
|
||||
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
|
||||
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
|
||||
static int fail_alloc_core_channel_method = -1;
|
||||
module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);
|
||||
|
||||
#if NVKMS_CONFIG_FILE_SUPPORTED
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
"Path to the nvidia-modeset configuration file (default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
#endif
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(
|
||||
enum FailAllocCoreChannelMethod method
|
||||
)
|
||||
{
|
||||
if (method != fail_alloc_core_channel_method) {
|
||||
// don't fail if it's not the currently specified method
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX
|
||||
"Failing core channel allocation using method %d",
|
||||
fail_alloc_core_channel_method);
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
@@ -139,6 +166,19 @@ NvBool nvkms_opportunistic_display_sync(void)
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void)
|
||||
{
|
||||
if (debug_force_color_space >= NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX) {
|
||||
return NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
|
||||
}
|
||||
return debug_force_color_space;
|
||||
}
|
||||
|
||||
NvBool nvkms_enable_overlay_layers(void)
|
||||
{
|
||||
return enable_overlay_layers;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
@@ -1010,6 +1050,11 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
|
||||
nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
|
||||
"ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
|
||||
acpi_video_register_backlight();
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
@@ -1084,7 +1129,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
|
||||
nvKmsKapiHandleEventQueueChange(device);
|
||||
}
|
||||
|
||||
struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
struct NvKmsKapiDevice *device,
|
||||
int *status)
|
||||
{
|
||||
@@ -1136,7 +1181,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1199,7 +1244,7 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
}
|
||||
}
|
||||
|
||||
int nvkms_ioctl_common
|
||||
static int nvkms_ioctl_common
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, NvU64 address, const size_t size
|
||||
@@ -1452,6 +1497,8 @@ static size_t nvkms_config_file_open
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
*buff = NULL;
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
@@ -1475,6 +1522,11 @@ static size_t nvkms_config_file_open
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Do not alloc a 0 sized buffer
|
||||
if (file_size == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
@@ -1558,6 +1610,48 @@ NvBool nvKmsKapiGetFunctionsTable
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiGetFunctionsTable);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32(NvU16 a)
|
||||
{
|
||||
return nvKmsKapiF16ToF32Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF16ToF32);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16(NvU32 a)
|
||||
{
|
||||
return nvKmsKapiF32ToF16Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32ToF16);
|
||||
|
||||
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32MulInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Mul);
|
||||
|
||||
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32DivInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Div);
|
||||
|
||||
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32AddInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Add);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact)
|
||||
{
|
||||
return nvKmsKapiF32ToUI32RMinMagInternal(a, exact);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32ToUI32RMinMag);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32(NvU32 a)
|
||||
{
|
||||
return nvKmsKapiUI32ToF32Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiUI32ToF32);
|
||||
|
||||
/*************************************************************************
|
||||
* File operation callback functions.
|
||||
*************************************************************************/
|
||||
|
||||
@@ -67,6 +67,14 @@ enum NvKmsSyncPtOp {
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
};
|
||||
|
||||
enum NvKmsDebugForceColorSpace {
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_RGB,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV444,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV422,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
||||
struct {
|
||||
@@ -96,12 +104,20 @@ typedef struct {
|
||||
} read_minval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
enum FailAllocCoreChannelMethod {
|
||||
FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
|
||||
FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
|
||||
};
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
NvBool nvkms_disable_hdmi_frl(void);
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
NvBool nvkms_hdmi_deepcolor(void);
|
||||
NvBool nvkms_vblank_sem_control(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
|
||||
NvBool nvkms_enable_overlay_layers(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
||||
@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
|
||||
NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
|
||||
NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $< $@
|
||||
|
||||
targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
|
||||
|
||||
$(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
|
||||
@@ -105,4 +102,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
|
||||
@@ -110,4 +110,18 @@ NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);
|
||||
|
||||
NvBool nvKmsOpenDevHasSubOwnerPermissionOrBetter(const struct NvKmsPerOpenDev *pOpenDev);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32Internal(NvU16 a);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16Internal(NvU32 a);
|
||||
|
||||
NvU32 nvKmsKapiF32MulInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32DivInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32AddInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMagInternal(NvU32 a, NvBool exact);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32Internal(NvU32 a);
|
||||
|
||||
#endif /* __NV_KMS_H__ */
|
||||
|
||||
@@ -189,6 +189,12 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data), void *data);
|
||||
|
||||
/*
|
||||
* Flags to be used with persistent APIs
|
||||
*/
|
||||
#define NVIDIA_P2P_FLAGS_DEFAULT 0
|
||||
#define NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING 1
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Pin and make the pages underlying a range of GPU virtual memory
|
||||
@@ -212,7 +218,11 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
* NVIDIA_P2P_FLAGS_DEFAULT:
|
||||
* Default value to be used if no specific behavior is expected.
|
||||
* NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING:
|
||||
* Force BAR1 mappings on certain coherent platforms,
|
||||
* subject to capability and supported topology.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
|
||||
@@ -1,30 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
|
||||
// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc365_h_
|
||||
#define _clc365_h_
|
||||
|
||||
@@ -1,30 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
|
||||
// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc369_h_
|
||||
#define _clc369_h_
|
||||
|
||||
@@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc36f_h_
|
||||
#define _clc36f_h_
|
||||
@@ -257,7 +256,6 @@ typedef volatile struct Nvc36fControl_struct {
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC36F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
||||
@@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc46f_h_
|
||||
#define _clc46f_h_
|
||||
@@ -259,7 +258,6 @@ typedef volatile struct Nvc46fControl_struct {
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC46F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
||||
@@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc56f_h_
|
||||
#define _clc56f_h_
|
||||
@@ -261,7 +260,6 @@ typedef volatile struct Nvc56fControl_struct {
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC56F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc5b5_h_
|
||||
@@ -34,64 +32,6 @@ extern "C" {
|
||||
|
||||
#define TURING_DMA_COPY_A (0x0000C5B5)
|
||||
|
||||
typedef volatile struct _clc5b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x3F];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 Reserved03[0x2];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved04[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved05[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved08[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved10[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved11[0x3BA];
|
||||
} turing_dma_copy_aControlPio;
|
||||
|
||||
#define NVC5B5_NOP (0x00000100)
|
||||
#define NVC5B5_NOP_PARAMETER 31:0
|
||||
#define NVC5B5_PM_TRIGGER (0x00000140)
|
||||
@@ -125,14 +65,6 @@ typedef volatile struct _clc5b5_tag0 {
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC5B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC5B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC5B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC5B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@@ -199,8 +131,6 @@ typedef volatile struct _clc5b5_tag0 {
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc6b5_h_
|
||||
@@ -34,64 +32,6 @@ extern "C" {
|
||||
|
||||
#define AMPERE_DMA_COPY_A (0x0000C6B5)
|
||||
|
||||
typedef volatile struct _clc6b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x3F];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 Reserved03[0x2];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved04[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved05[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved08[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved10[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved11[0x3BA];
|
||||
} ampere_dma_copy_aControlPio;
|
||||
|
||||
#define NVC6B5_NOP (0x00000100)
|
||||
#define NVC6B5_NOP_PARAMETER 31:0
|
||||
#define NVC6B5_PM_TRIGGER (0x00000140)
|
||||
@@ -131,14 +71,6 @@ typedef volatile struct _clc6b5_tag0 {
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC6B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC6B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC6B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC6B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@@ -199,8 +131,6 @@ typedef volatile struct _clc6b5_tag0 {
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc7b5_h_
|
||||
@@ -34,69 +32,6 @@ extern "C" {
|
||||
|
||||
#define AMPERE_DMA_COPY_B (0x0000C7B5)
|
||||
|
||||
typedef volatile struct _clc7b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved06[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved07[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved08[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved11[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved12[0x3BA];
|
||||
} ampere_dma_copy_bControlPio;
|
||||
|
||||
#define NVC7B5_NOP (0x00000100)
|
||||
#define NVC7B5_NOP_PARAMETER 31:0
|
||||
#define NVC7B5_PM_TRIGGER (0x00000140)
|
||||
@@ -146,14 +81,6 @@ typedef volatile struct _clc7b5_tag0 {
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC7B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC7B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC7B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC7B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@@ -223,8 +150,6 @@ typedef volatile struct _clc7b5_tag0 {
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
||||
74
kernel-open/nvidia-uvm/clca6f.h
Normal file
74
kernel-open/nvidia-uvm/clca6f.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gb202_clca6f_h__
|
||||
#define __gb202_clca6f_h__
|
||||
|
||||
typedef volatile struct Nvca6fControl_struct {
|
||||
NvU32 Ignored00[0x23]; /* 0000-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored01[0x5c];
|
||||
} Nvca6fControl, BlackwellBControlGPFifo;
|
||||
|
||||
#define BLACKWELL_CHANNEL_GPFIFO_B (0x0000CA6F)
|
||||
|
||||
#define NVCA6F_SET_OBJECT (0x00000000)
|
||||
#define NVCA6F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVCA6F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVCA6F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVCA6F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVCA6F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVCA6F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVCA6F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVCA6F_GP_ENTRY__SIZE 8
|
||||
#define NVCA6F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVCA6F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVCA6F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVCA6F_GP_ENTRY0_GET 31:2
|
||||
#define NVCA6F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVCA6F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
|
||||
#define NVCA6F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVCA6F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVCA6F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
#endif // __gb202_clca6f_h__
|
||||
44
kernel-open/nvidia-uvm/clcab5.h
Normal file
44
kernel-open/nvidia-uvm/clcab5.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clcab5_h_
|
||||
#define _clcab5_h_
|
||||
|
||||
#define BLACKWELL_DMA_COPY_B (0x0000CAB5)
|
||||
|
||||
#define NVCAB5_LAUNCH_DMA (0x00000300)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PREFETCH (0x00000003)
|
||||
|
||||
#define NVCAB5_REQ_ATTR (0x00000754)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS 1:0
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_FIRST (0x00000000)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_NORMAL (0x00000001)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_LAST (0x00000002)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_DEMOTE (0x00000003)
|
||||
|
||||
#endif /* _clcab5_h_ */
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
@@ -32,6 +32,28 @@ extern "C" {
|
||||
|
||||
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
|
||||
|
||||
typedef volatile struct _clcba2_tag0 {
|
||||
NvV32 Reserved00[0x100];
|
||||
NvV32 DecryptCopySrcAddrHi; // 0x00000400 - 0x00000403
|
||||
NvV32 DecryptCopySrcAddrLo; // 0x00000404 - 0x00000407
|
||||
NvV32 DecryptCopyDstAddrHi; // 0x00000408 - 0x0000040B
|
||||
NvV32 DecryptCopyDstAddrLo; // 0x0000040c - 0x0000040F
|
||||
NvU32 DecryptCopySize; // 0x00000410 - 0x00000413
|
||||
NvU32 DecryptCopyAuthTagAddrHi; // 0x00000414 - 0x00000417
|
||||
NvU32 DecryptCopyAuthTagAddrLo; // 0x00000418 - 0x0000041B
|
||||
NvV32 DigestAddrHi; // 0x0000041C - 0x0000041F
|
||||
NvV32 DigestAddrLo; // 0x00000420 - 0x00000423
|
||||
NvV32 Reserved01[0x7];
|
||||
NvV32 SemaphoreA; // 0x00000440 - 0x00000443
|
||||
NvV32 SemaphoreB; // 0x00000444 - 0x00000447
|
||||
NvV32 SemaphoreSetPayloadLower; // 0x00000448 - 0x0000044B
|
||||
NvV32 SemaphoreSetPayloadUppper; // 0x0000044C - 0x0000044F
|
||||
NvV32 SemaphoreD; // 0x00000450 - 0x00000453
|
||||
NvU32 Reserved02[0x7];
|
||||
NvV32 Execute; // 0x00000470 - 0x00000473
|
||||
NvV32 Reserved03[0x23];
|
||||
} NVCBA2_HOPPER_SEC2_WORK_LAUNCH_AControlPio;
|
||||
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
|
||||
@@ -90,6 +112,45 @@ extern "C" {
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER 6:6
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_ENABLE (0x00000001)
|
||||
|
||||
// Class definitions
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE_MAX_BYTES (2*1024*1024)
|
||||
#define NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES (1024*1024*1024)
|
||||
|
||||
// Errors
|
||||
#define NVCBA2_ERROR_NONE (0x00000000)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_SRC_ADDR_MISALIGNED_POINTER (0x00000001)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_DEST_ADDR_MISALIGNED_POINTER (0x00000002)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000003)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_DMA_NACK (0x00000004)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_MISMATCH (0x00000005)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000006)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_DMA_NACK (0x00000007)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_CHECK_FAILURE (0x00000008)
|
||||
#define NVCBA2_ERROR_MISALIGNED_SIZE (0x00000009)
|
||||
#define NVCBA2_ERROR_MISSING_METHODS (0x0000000A)
|
||||
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_DMA_NACK (0x0000000B)
|
||||
#define NVCBA2_ERROR_DECRYPT_SIZE_MAX_EXCEEDED (0x0000000C)
|
||||
#define NVCBA2_ERROR_OS_APPLICATION (0x0000000D)
|
||||
#define NVCBA2_ERROR_INVALID_CTXSW_REQUEST (0x0000000E)
|
||||
#define NVCBA2_ERROR_BUFFER_OVERFLOW (0x0000000F)
|
||||
#define NVCBA2_ERROR_IV_OVERFLOW (0x00000010)
|
||||
#define NVCBA2_ERROR_INTERNAL_SETUP_FAILURE (0x00000011)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_INTERNAL_DMA_FAILURE (0x00000012)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_INTERNAL_DMA_FAILURE (0x00000013)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_HMAC_CALC_FAILURE (0x00000014)
|
||||
#define NVCBA2_ERROR_NONCE_OVERFLOW (0x00000015)
|
||||
#define NVCBA2_ERROR_AES_GCM_DECRYPTION_FAILURE (0x00000016)
|
||||
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_INTERNAL_DMA_FAILURE (0x00000017)
|
||||
#define NVCBA2_ERROR_KEY_DERIVATION_FAILURE (0x00000018)
|
||||
#define NVCBA2_ERROR_SCRUBBER_FAILURE (0x00000019)
|
||||
#define NVCBA2_ERROR_SCRUBBER_INVALD_ADDRESS (0x0000001a)
|
||||
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
|
||||
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
|
||||
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,6 +35,7 @@
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 (0x000001A0)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 (0x000001B0)
|
||||
|
||||
/* valid ARCHITECTURE_GP10x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
|
||||
|
||||
@@ -462,6 +462,7 @@
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC3 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB3 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD1 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD2 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD3 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE1 0x00000049 /* */
|
||||
|
||||
@@ -1,560 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gb100_dev_mmu_h__
|
||||
#define __gb100_dev_mmu_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_MMU_PDE /* ----G */
|
||||
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PDE__SIZE 8
|
||||
#define NV_MMU_PTE /* ----G */
|
||||
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PTE__SIZE 8
|
||||
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
|
||||
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
|
||||
#define NV_MMU_VER1_PDE /* ----G */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PDE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE /* ----G */
|
||||
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PTE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_NEW_PDE /* ----G */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PDE__SIZE 8
|
||||
#define NV_MMU_NEW_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_NEW_PTE /* ----G */
|
||||
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PTE__SIZE 8
|
||||
#define NV_MMU_VER2_PDE /* ----G */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PDE__SIZE 8
|
||||
#define NV_MMU_VER2_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER2_PTE /* ----G */
|
||||
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PTE__SIZE 8
|
||||
#define NV_MMU_VER3_PDE /* ----G */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PDE__SIZE 8
|
||||
#define NV_MMU_VER3_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER3_PTE /* ----G */
|
||||
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PTE__SIZE 8
|
||||
#define NV_MMU_CLIENT /* ----G */
|
||||
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
|
||||
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
|
||||
#endif // __gb100_dev_mmu_h__
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -81,7 +81,7 @@
|
||||
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
|
||||
|
||||
// This exists in order to have a function to place a breakpoint on:
|
||||
void on_nvq_assert(void)
|
||||
static void on_nvq_assert(void)
|
||||
{
|
||||
(void)NULL;
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -1,18 +1,8 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
|
||||
@@ -36,6 +26,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range_device_p2p.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group.c
|
||||
@@ -54,6 +45,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
|
||||
@@ -67,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
|
||||
@@ -82,8 +73,13 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@@ -101,6 +97,8 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
@@ -128,3 +126,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
|
||||
@@ -13,19 +13,6 @@ NVIDIA_UVM_OBJECTS =
|
||||
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
|
||||
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
|
||||
|
||||
# Some linux kernel functions rely on being built with optimizations on and
|
||||
# to work around this we put wrappers for them in a separate file that's built
|
||||
# with optimizations on in debug builds and skipped in other builds.
|
||||
# Notably gcc 4.4 supports per function optimization attributes that would be
|
||||
# easier to use, but is too recent to rely on for now.
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
|
||||
|
||||
ifneq ($(UVM_BUILD_TYPE),debug)
|
||||
# Only build the wrappers on debug builds
|
||||
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
|
||||
endif
|
||||
|
||||
obj-m += nvidia-uvm.o
|
||||
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
@@ -36,15 +23,14 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
NVIDIA_UVM_CFLAGS += -O2
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -g
|
||||
endif
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
|
||||
@@ -56,30 +42,17 @@ NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
|
||||
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
# Force optimizations on for the wrappers
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
|
||||
endif
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-uvm.ko
|
||||
#
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_bus_address
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
@@ -88,26 +61,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
@@ -115,6 +76,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -127,9 +127,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
@@ -222,10 +222,6 @@ static int uvm_open(struct inode *inode, struct file *filp)
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
@@ -244,7 +240,7 @@ static void uvm_release_deferred(void *data)
|
||||
// Since this function is only scheduled to run when uvm_release() fails
|
||||
// to trylock-acquire the pm.lock, the following acquisition attempt
|
||||
// is expected to block this thread, and cause it to remain blocked until
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// kthread queue may stall for long periods of time.
|
||||
uvm_down_read(&g_uvm_global.pm.lock);
|
||||
|
||||
@@ -296,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
|
||||
// Because the kernel discards the status code returned from this release
|
||||
// callback, early exit in case of a pm.lock acquisition failure is not
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// needs to be scheduled to run after uvm_resume() releases the lock.
|
||||
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
uvm_va_space_destroy(va_space);
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
else {
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// strictly necessary, as any CPU mappings of this file have already
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
@@ -325,21 +321,21 @@ static int uvm_release_entry(struct inode *inode, struct file *filp)
|
||||
|
||||
static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie)
|
||||
{
|
||||
uvm_va_range_t *va_range, *va_range_next;
|
||||
uvm_va_range_managed_t *managed_range, *managed_range_next;
|
||||
NvU64 size = 0;
|
||||
|
||||
uvm_assert_rwsem_locked_write(&uvm_va_space_get(vma->vm_file)->lock);
|
||||
uvm_for_each_va_range_in_vma_safe(va_range, va_range_next, vma) {
|
||||
uvm_for_each_va_range_managed_in_vma_safe(managed_range, managed_range_next, vma) {
|
||||
// On exit_mmap (process teardown), current->mm is cleared so
|
||||
// uvm_va_range_vma_current would return NULL.
|
||||
UVM_ASSERT(uvm_va_range_vma(va_range) == vma);
|
||||
UVM_ASSERT(va_range->node.start >= vma->vm_start);
|
||||
UVM_ASSERT(va_range->node.end < vma->vm_end);
|
||||
size += uvm_va_range_size(va_range);
|
||||
UVM_ASSERT(uvm_va_range_vma(managed_range) == vma);
|
||||
UVM_ASSERT(managed_range->va_range.node.start >= vma->vm_start);
|
||||
UVM_ASSERT(managed_range->va_range.node.end < vma->vm_end);
|
||||
size += uvm_va_range_size(&managed_range->va_range);
|
||||
if (make_zombie)
|
||||
uvm_va_range_zombify(va_range);
|
||||
uvm_va_range_zombify(managed_range);
|
||||
else
|
||||
uvm_va_range_destroy(va_range, NULL);
|
||||
uvm_va_range_destroy(&managed_range->va_range, NULL);
|
||||
}
|
||||
|
||||
if (vma->vm_private_data) {
|
||||
@@ -351,18 +347,17 @@ static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie
|
||||
|
||||
static void uvm_destroy_vma_semaphore_pool(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
|
||||
va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
va_range = uvm_va_range_find(va_space, vma->vm_start);
|
||||
UVM_ASSERT(va_range &&
|
||||
va_range->node.start == vma->vm_start &&
|
||||
va_range->node.end + 1 == vma->vm_end &&
|
||||
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
|
||||
UVM_ASSERT(semaphore_pool_range &&
|
||||
semaphore_pool_range->va_range.node.start == vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end);
|
||||
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
}
|
||||
|
||||
// If a fault handler is not set, paths like handle_pte_fault in older kernels
|
||||
@@ -478,7 +473,7 @@ static void uvm_vm_open_failure(struct vm_area_struct *original,
|
||||
static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
struct vm_area_struct *original;
|
||||
NV_STATUS status;
|
||||
NvU64 new_end;
|
||||
@@ -534,13 +529,13 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
goto out;
|
||||
}
|
||||
|
||||
// There can be multiple va_ranges under the vma already. Check if one spans
|
||||
// There can be multiple ranges under the vma already. Check if one spans
|
||||
// the new split boundary. If so, split it.
|
||||
va_range = uvm_va_range_find(va_space, new_end);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
|
||||
if (va_range->node.end != new_end) {
|
||||
status = uvm_va_range_split(va_range, new_end, NULL);
|
||||
managed_range = uvm_va_range_managed_find(va_space, new_end);
|
||||
UVM_ASSERT(managed_range);
|
||||
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
|
||||
if (managed_range->va_range.node.end != new_end) {
|
||||
status = uvm_va_range_split(managed_range, new_end, NULL);
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Failed to split VA range, destroying both: %s. "
|
||||
"original vma [0x%lx, 0x%lx) new vma [0x%lx, 0x%lx)\n",
|
||||
@@ -552,10 +547,10 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
}
|
||||
}
|
||||
|
||||
// Point va_ranges to the new vma
|
||||
uvm_for_each_va_range_in_vma(va_range, vma) {
|
||||
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
|
||||
va_range->managed.vma_wrapper = vma->vm_private_data;
|
||||
// Point managed_ranges to the new vma
|
||||
uvm_for_each_va_range_managed_in_vma(managed_range, vma) {
|
||||
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
|
||||
managed_range->vma_wrapper = vma->vm_private_data;
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -657,12 +652,12 @@ static struct vm_operations_struct uvm_vm_ops_managed =
|
||||
};
|
||||
|
||||
// vm operations on semaphore pool allocations only control CPU mappings. Unmapping GPUs,
|
||||
// freeing the allocation, and destroying the va_range are handled by UVM_FREE.
|
||||
// freeing the allocation, and destroying the range are handled by UVM_FREE.
|
||||
static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
|
||||
NV_STATUS status;
|
||||
|
||||
@@ -670,18 +665,24 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL &&
|
||||
va_range->node.start == origin_vma->vm_start &&
|
||||
va_range->node.end + 1 == origin_vma->vm_end,
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(semaphore_pool_range);
|
||||
UVM_ASSERT_MSG(semaphore_pool_range &&
|
||||
semaphore_pool_range->va_range.node.start == origin_vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == origin_vma->vm_end,
|
||||
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
|
||||
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
|
||||
va_range->node.end + 1, va_range->type);
|
||||
(NvU64)origin_vma->vm_start,
|
||||
(NvU64)origin_vma->vm_end,
|
||||
semaphore_pool_range->va_range.node.start,
|
||||
semaphore_pool_range->va_range.node.end + 1,
|
||||
semaphore_pool_range->va_range.type);
|
||||
|
||||
// Semaphore pool vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
#if defined(VM_WIPEONFORK)
|
||||
nv_vm_flags_set(vma, VM_WIPEONFORK);
|
||||
#endif
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
@@ -689,9 +690,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
|
||||
// uvm_disable_vma unmaps in the parent as well; clear the uvm_mem CPU
|
||||
// user mapping metadata and then remap.
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
|
||||
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, origin_vma);
|
||||
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, origin_vma);
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Failed to remap semaphore pool to CPU for parent after fork; status = %d (%s)",
|
||||
status, nvstatusToString(status));
|
||||
@@ -702,7 +703,7 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
origin_vma->vm_private_data = NULL;
|
||||
origin_vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
@@ -751,10 +752,84 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
#endif
|
||||
};
|
||||
|
||||
static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
|
||||
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_DEVICE_P2P &&
|
||||
va_range->node.start == origin_vma->vm_start &&
|
||||
va_range->node.end + 1 == origin_vma->vm_end,
|
||||
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
|
||||
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
|
||||
va_range->node.end + 1, va_range->type);
|
||||
|
||||
// Device P2P vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
#if defined(VM_WIPEONFORK)
|
||||
nv_vm_flags_set(vma, VM_WIPEONFORK);
|
||||
#endif
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
uvm_disable_vma(vma);
|
||||
|
||||
// uvm_disable_vma unmaps in the parent as well so remap the parent
|
||||
uvm_va_range_device_p2p_map_cpu(va_range->va_space, origin_vma, uvm_va_range_to_device_p2p(va_range));
|
||||
}
|
||||
else {
|
||||
// mremap will free the backing pages via unmap so we can't support it.
|
||||
origin_vma->vm_private_data = NULL;
|
||||
origin_vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
unmap_mapping_range(va_space->mapping, va_range->node.start, va_range->node.end - va_range->node.start + 1, 1);
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
uvm_record_unlock_mmap_lock_write(current->mm);
|
||||
}
|
||||
|
||||
static void uvm_vm_open_device_p2p_entry(struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_VOID(uvm_vm_open_device_p2p(vma));
|
||||
}
|
||||
|
||||
// Device P2P pages are only mapped on the CPU. Pages are allocated externally
|
||||
// to UVM but destroying the range must unpin the RM object.
|
||||
static void uvm_vm_close_device_p2p(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_vm_close_device_p2p_entry(struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_VOID(uvm_vm_close_device_p2p(vma));
|
||||
}
|
||||
|
||||
static struct vm_operations_struct uvm_vm_ops_device_p2p =
|
||||
{
|
||||
.open = uvm_vm_open_device_p2p_entry,
|
||||
.close = uvm_vm_close_device_p2p_entry,
|
||||
|
||||
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
|
||||
.fault = uvm_vm_fault_sigbus_wrapper_entry,
|
||||
#else
|
||||
.fault = uvm_vm_fault_sigbus_entry,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
bool vma_wrapper_allocated = false;
|
||||
@@ -792,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
// If the PM lock cannot be acquired, disable the VMA and report success
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// safely handle MAP_FIXED, which needs to complete atomically to prevent
|
||||
// the loss of the virtual address range.
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
@@ -845,18 +920,28 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
status = uvm_va_range_create_mmap(va_space, current->mm, vma->vm_private_data, NULL);
|
||||
|
||||
if (status == NV_ERR_UVM_ADDRESS_IN_USE) {
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
uvm_va_range_device_p2p_t *device_p2p_range;
|
||||
// If the mmap is for a semaphore pool, the VA range will have been
|
||||
// allocated by a previous ioctl, and the mmap just creates the CPU
|
||||
// mapping.
|
||||
va_range = uvm_va_range_find(va_space, vma->vm_start);
|
||||
if (va_range && va_range->node.start == vma->vm_start &&
|
||||
va_range->node.end + 1 == vma->vm_end &&
|
||||
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) {
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
|
||||
device_p2p_range = uvm_va_range_device_p2p_find(va_space, vma->vm_start);
|
||||
if (semaphore_pool_range && semaphore_pool_range->va_range.node.start == vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end) {
|
||||
uvm_vma_wrapper_destroy(vma->vm_private_data);
|
||||
vma_wrapper_allocated = false;
|
||||
vma->vm_private_data = vma;
|
||||
vma->vm_ops = &uvm_vm_ops_semaphore_pool;
|
||||
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, vma);
|
||||
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, vma);
|
||||
}
|
||||
else if (device_p2p_range && device_p2p_range->va_range.node.start == vma->vm_start &&
|
||||
device_p2p_range->va_range.node.end + 1 == vma->vm_end) {
|
||||
uvm_vma_wrapper_destroy(vma->vm_private_data);
|
||||
vma_wrapper_allocated = false;
|
||||
vma->vm_private_data = vma;
|
||||
vma->vm_ops = &uvm_vm_ops_device_p2p;
|
||||
status = uvm_va_range_device_p2p_map_cpu(va_space, vma, device_p2p_range);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -914,8 +999,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
@@ -1001,6 +1087,9 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAN_UP_ZOMBIE_RESOURCES, uvm_api_clean_up_zombie_resources);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_POPULATE_PAGEABLE, uvm_api_populate_pageable);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_VALIDATE_VA_RANGE, uvm_api_validate_va_range);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_V2,uvm_api_tools_get_processor_uuid_table_v2);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_ALLOC_DEVICE_P2P, uvm_api_alloc_device_p2p);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAR_ALL_ACCESS_COUNTERS, uvm_api_clear_all_access_counters);
|
||||
}
|
||||
|
||||
// Try the test ioctls if none of the above matched
|
||||
@@ -1144,19 +1233,8 @@ static int uvm_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
// After Open RM is released, both the enclosing "#if" and this comment
|
||||
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
|
||||
// check is both necessary and sufficient for reporting functionality.
|
||||
// Until that time, however, we need to avoid advertisting UVM's ability to
|
||||
// enable HMM functionality.
|
||||
|
||||
if (uvm_hmm_is_enabled_system_wide())
|
||||
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
|
||||
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1185,8 +1263,6 @@ static void uvm_exit(void)
|
||||
uvm_global_exit();
|
||||
|
||||
uvm_test_unload_state_exit();
|
||||
|
||||
pr_info("Unloaded the UVM driver.\n");
|
||||
}
|
||||
|
||||
static void __exit uvm_exit_entry(void)
|
||||
|
||||
@@ -45,20 +45,20 @@
|
||||
// #endif
|
||||
// 3) Do the same thing for the function definition, and for any structs that
|
||||
// are taken as arguments to these functions.
|
||||
// 4) Let this change propagate over to cuda_a and dev_a, so that the CUDA and
|
||||
// nvidia-cfg libraries can start using the new API by bumping up the API
|
||||
// 4) Let this change propagate over to cuda_a and bugfix_main, so that the CUDA
|
||||
// and nvidia-cfg libraries can start using the new API by bumping up the API
|
||||
// version number it's using.
|
||||
// Places where UVM_API_REVISION is defined are:
|
||||
// drivers/gpgpu/cuda/cuda.nvmk (cuda_a)
|
||||
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (dev_a)
|
||||
// 5) Once the dev_a and cuda_a changes have made it back into chips_a,
|
||||
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (bugfix_main)
|
||||
// 5) Once the bugfix_main and cuda_a changes have made it back into chips_a,
|
||||
// remove the old API declaration, definition, and any old structs that were
|
||||
// in use.
|
||||
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 12
|
||||
#define UVM_API_LATEST_REVISION 13
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@@ -379,41 +379,24 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// OS state required to register the GPU is malformed, or the partition
|
||||
// identified by the user handles or its configuration changed.
|
||||
//
|
||||
// NV_ERR_NVLINK_FABRIC_NOT_READY:
|
||||
// (On NvSwitch-connected system) Indicates that the fabric has not been
|
||||
// configured yet. Caller must retry GPU registration.
|
||||
//
|
||||
// NV_ERR_NVLINK_FABRIC_FAILURE:
|
||||
// (On NvSwitch-connected systems) Indicates that the NvLink fabric
|
||||
// failed to be configured.
|
||||
//
|
||||
// NV_ERR_GPU_MEMORY_ONLINING_FAULURE:
|
||||
// (On coherent systems) The GPU's memory onlining failed.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
#else
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmRegisterGpuSmc
|
||||
//
|
||||
// The same as UvmRegisterGpu, but takes additional parameters to specify the
|
||||
// GPU partition being registered if SMC is enabled.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU of the SMC partition to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// User handles identifying the partition to register.
|
||||
//
|
||||
// Error codes (see UvmRegisterGpu also):
|
||||
//
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// SMC was not enabled, or the partition identified by the user
|
||||
// handles or its configuration changed.
|
||||
//
|
||||
NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnregisterGpu
|
||||
@@ -1345,9 +1328,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
// The Confidential Computing feature is enabled and the perGpuAttribs
|
||||
// list is empty.
|
||||
// or caching is requested on more than one GPU, or (in Confidential
|
||||
// Computing only) the perGpuAttribs list is empty.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
@@ -1364,6 +1346,86 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
const UvmGpuMappingAttributes *perGpuAttribs,
|
||||
NvLength gpuAttribsCount);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmAllocDeviceP2P
|
||||
//
|
||||
// Create a VA range within the process's address space reserved for use by
|
||||
// other devices to directly access GPU memory. The memory associated with the
|
||||
// RM handle is mapped into the user address space associated with the range for
|
||||
// direct access from the CPU.
|
||||
//
|
||||
// The VA range must not overlap with an existing VA range, irrespective of
|
||||
// whether the existing range corresponds to a UVM allocation or an external
|
||||
// allocation.
|
||||
//
|
||||
// Multiple VA ranges may be created mapping the same physical memory associated
|
||||
// with the RM handle. The associated GPU memory will not be freed until all VA
|
||||
// ranges have been destroyed either explicitly or implicitly and all non-UVM
|
||||
// users (eg. third party device drivers) have stopped using the associated
|
||||
// GPU memory.
|
||||
//
|
||||
// The VA range can be unmapped and freed by calling UvmFree.
|
||||
//
|
||||
// Destroying the final range mapping the RM handle may block until all third
|
||||
// party device drivers and other kernel users have stopped using the memory.
|
||||
//
|
||||
// These VA ranges are only associated with a single GPU.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition containing the
|
||||
// memory to be mapped on the CPU.
|
||||
//
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
//
|
||||
// length: (INPUT)
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// offset: (INPUT)
|
||||
// Offset, in bytes, from the start of the externally allocated memory
|
||||
// to map from.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// Platform specific parameters that identify the allocation.
|
||||
// On Linux: RM ctrl fd, hClient and the handle (hMemory) of the
|
||||
// externally allocated memory to map.
|
||||
//
|
||||
// Errors:
|
||||
//
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base is NULL or length is zero or at least one of base and length is
|
||||
// not aligned to 4K.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// The gpuUuid was either not registered or has no GPU VA space
|
||||
// registered for it.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// base + offset + length exceeeds the end of the externally allocated
|
||||
// memory handle or the externally allocated handle is not valid.
|
||||
//
|
||||
// NV_ERR_UVM_ADDRESS_IN_USE:
|
||||
// The requested virtual address range overlaps with an existing
|
||||
// allocation.
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The device peer-to-peer feature is not supported by the current
|
||||
// system configuration. This may be because the GPU doesn't support
|
||||
// the peer-to-peer feature or the kernel was not built with the correct
|
||||
// configuration options.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
|
||||
void *base,
|
||||
NvLength length,
|
||||
NvLength offset,
|
||||
const UvmDeviceP2PPlatformParams *platformParams);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmMigrate
|
||||
//
|
||||
@@ -1417,7 +1479,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// If read duplication is enabled on any pages in the VA range, then those pages
|
||||
// are read duplicated at the destination processor, leaving the source copy, if
|
||||
// present, intact with only its mapping changed to read-only if it wasn't
|
||||
// already mapped that way.
|
||||
// already mapped that way. The exception to this behavior is migrating pages
|
||||
// between different NUMA nodes, in which case the pages are migrated to the
|
||||
// destination node and a read-only mapping is created to the migrated pages.
|
||||
//
|
||||
// Pages in the VA range are migrated even if their preferred location is set to
|
||||
// a processor other than the destination processor.
|
||||
@@ -2160,7 +2224,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
//
|
||||
// If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages
|
||||
// in this VA range, then those pages will also be read duplicated on the
|
||||
// destination processor for the migration.
|
||||
// destination processor for the migration unless the migration is between CPU
|
||||
// NUMA nodes, in which case the pages are migrated to the destination NUMA
|
||||
// node and a read-only mapping to the migrated pages is created.
|
||||
//
|
||||
// Enabling read duplication on a VA range requires the CPU and all GPUs with
|
||||
// registered VA spaces to be fault-capable. Otherwise, the migration and
|
||||
@@ -3276,7 +3342,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventEntry_V1 *pBuffer,
|
||||
UvmEventEntry *pBuffer,
|
||||
NvU64 *nEntries);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -3472,32 +3538,21 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
|
||||
//
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
// sizeof(UvmEventEntry_V2).
|
||||
NvLength UvmToolsGetEventEntrySize(void);
|
||||
#endif
|
||||
|
||||
NvLength UvmToolsGetNumberOfCounters(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateEventQueue
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of events.
|
||||
// All events are disabled by default. Event queue data persists lifetime of the
|
||||
// target process.
|
||||
// This function is deprecated. See UvmToolsCreateEventQueue_V2.
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of
|
||||
// UvmEventEntry events. All events are disabled by default. Event queue data
|
||||
// persists lifetime of the target process.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
@@ -3520,8 +3575,55 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate a queue of requested size. Another
|
||||
// would be either event_buffer or event_control memory couldn't be
|
||||
// pinned (e.g. because of OS limitation of pinnable memory). Also it
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateEventQueue_V2
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of
|
||||
// UvmEventEntry_V2 events. All events are disabled by default. Event queue data
|
||||
// persists beyond the lifetime of the target process.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
// destroyed.
|
||||
//
|
||||
// event_buffer_size: (INPUT)
|
||||
// Size of the event queue buffer in units of UvmEventEntry_V2's. Must
|
||||
// be a power of two, and greater than 1.
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
// Handle to the created queue.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INSUFFICIENT_PERMISSIONS:
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
@@ -3538,20 +3640,11 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#else
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#endif
|
||||
NV_STATUS UvmToolsCreateEventQueue_V2(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
|
||||
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
|
||||
|
||||
@@ -3866,9 +3959,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// In-process scenario when targetVa address + size overlaps with buffer + size.
|
||||
//
|
||||
// This is essentially a UVM version of RM ctrl call
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
|
||||
// information), please refer to the documentation:
|
||||
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
@@ -3922,9 +4013,7 @@ NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
// buffer + size.
|
||||
//
|
||||
// This is essentially a UVM version of RM ctrl call
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
|
||||
// information), please refer to the documentation:
|
||||
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
@@ -3967,6 +4056,8 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
//
|
||||
// This function is deprecated. See UvmToolsGetProcessorUuidTable_V2.
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
@@ -3979,55 +4070,63 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for the UUID table returned. The version must
|
||||
// match the requested version of the event queue created with
|
||||
// UvmToolsCreateEventQueue(). See UvmToolsEventQueueVersion.
|
||||
// If the version of the event queue does not match the version of the
|
||||
// UUID table, the behavior is undefined.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The number of elements in the array must be greater
|
||||
// or equal to UVM_MAX_PROCESSORS_V1 if the version is
|
||||
// UvmToolsEventQueueVersion_V1 and UVM_MAX_PROCESSORS if the version is
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// or equal to UVM_MAX_PROCESSORS_V1.
|
||||
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
|
||||
// index this array. Unused indices will have a UUID of zero.
|
||||
// If version is UvmToolsEventQueueVersion_V1 then the reported UUID
|
||||
// will be that of the corresponding physical GPU, even if multiple SMC
|
||||
// partitions are registered under that physical GPU. If version is
|
||||
// UvmToolsEventQueueVersion_V2 then the reported UUID will be the GPU
|
||||
// instance UUID if SMC is enabled, otherwise it will be the UUID of
|
||||
// the physical GPU.
|
||||
// The reported UUID will be that of the corresponding physical GPU,
|
||||
// even if multiple SMC partitions are registered under that physical
|
||||
// GPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsGetProcessorUuidTable_V2
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
// registered, its index in the table does not change.
|
||||
// Note that the index in the table corresponds to the processor ID reported
|
||||
// in UvmEventEntry event records and that the table is not contiguously packed
|
||||
// with non-zero UUIDs even with no GPU unregistrations.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The number of elements in the array must be greater
|
||||
// or equal to UVM_MAX_PROCESSORS.
|
||||
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
|
||||
// index this array. Unused indices will have a UUID of zero.
|
||||
// The reported UUID will be the GPU instance UUID if SMC is enabled,
|
||||
// otherwise it will be the UUID of the physical GPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The kernel is not able to support the requested version
|
||||
// (i.e., the UVM kernel driver is older and doesn't support
|
||||
// UvmToolsEventQueueVersion_V2).
|
||||
// The UVM kernel driver is older and doesn't support
|
||||
// UvmToolsGetProcessorUuidTable_V2.
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(11)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table,
|
||||
NvLength table_size,
|
||||
NvLength *count);
|
||||
#else
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table);
|
||||
#endif
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable_V2(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsFlushEvents
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
|
||||
8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
@@ -51,6 +49,9 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@@ -79,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -98,4 +97,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
Copyright (c) 2018-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
@@ -51,6 +49,9 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@@ -83,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -107,4 +106,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
@@ -116,6 +118,16 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
@@ -182,6 +194,8 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return;
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (uvm_channel_is_privileged(push->channel)) {
|
||||
switch (method_address) {
|
||||
case NVC56F_SET_OBJECT:
|
||||
@@ -84,6 +86,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
{ \
|
||||
params_type params; \
|
||||
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
if (copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
@@ -60,7 +60,7 @@
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
if (copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
return 0; \
|
||||
@@ -84,7 +84,7 @@
|
||||
if (!params) \
|
||||
return -ENOMEM; \
|
||||
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
if (copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
uvm_kvfree(params); \
|
||||
return -EFAULT; \
|
||||
} \
|
||||
@@ -99,7 +99,7 @@
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
if (copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
ret = -EFAULT; \
|
||||
\
|
||||
uvm_kvfree(params); \
|
||||
@@ -244,6 +244,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_enable_system_wide_atomics(UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_disable_system_wide_atomics(UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_init_event_tracker_v2(UVM_TOOLS_INIT_EVENT_TRACKER_V2_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp);
|
||||
@@ -256,5 +257,7 @@ NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file
|
||||
NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_alloc_device_p2p(UVM_ALLOC_DEVICE_P2P_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif // __UVM_API_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2024-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
@@ -55,8 +56,9 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
uvm_populate_permissions_t populate_permissions = fault_service_type ?
|
||||
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
|
||||
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
@@ -90,18 +92,27 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = is_fault_service_type,
|
||||
.skip_mapped = is_fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = is_fault_service_type,
|
||||
.populate_on_migrate_vma_failures = is_fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
|
||||
// Potential STO NVLINK errors cannot be resolved in fault or access
|
||||
// counter handlers. If there are GPUs to check for STO, it's either
|
||||
// a) a false positive, and the migration went through ok, or
|
||||
// b) a true positive, and the destination is all zeros, and the
|
||||
// application will be terminated soon.
|
||||
.gpus_to_check_for_nvlink_errors = NULL,
|
||||
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
|
||||
};
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
@@ -112,7 +123,7 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// set skip_mapped to true. For pages already mapped, this will only handle
|
||||
// PTE upgrades if needed.
|
||||
status = uvm_migrate_pageable(&uvm_migrate_args);
|
||||
if (status == NV_WARN_NOTHING_TO_DO)
|
||||
if (is_fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
|
||||
status = NV_OK;
|
||||
|
||||
UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
|
||||
@@ -128,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->tlb_batch_pending) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
|
||||
@@ -145,7 +156,10 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
int residency = uvm_gpu_numa_node(gpu);
|
||||
int residency;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
residency = uvm_gpu_numa_node(gpu);
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
@@ -278,6 +292,27 @@ static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
|
||||
|
||||
#endif
|
||||
|
||||
static bool resident_policy_match(struct vm_area_struct *vma, int dst_nid, int src_nid)
|
||||
{
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
|
||||
// TODO: Bug 4981209: When migrations between CPU numa nodes are supported,
|
||||
// add (dst_nid != closest_cpu_numa_node) to allow migrations between CPU
|
||||
// NUMA nodes when destination is the closest_cpu_numa_node.
|
||||
if (vma_policy &&
|
||||
node_isset(src_nid, vma_policy->nodes) &&
|
||||
node_isset(dst_nid, vma_policy->nodes) &&
|
||||
!cpumask_empty(cpumask_of_node(src_nid)) &&
|
||||
!cpumask_empty(cpumask_of_node(dst_nid))) {
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
@@ -357,9 +392,23 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
if (pfn & HMM_PFN_VALID) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
int resident_node = page_to_nid(page);
|
||||
|
||||
if (page_to_nid(page) == ats_context->residency_node)
|
||||
// Set the residency_mask if:
|
||||
// - The page is already resident at the intended destination.
|
||||
// or
|
||||
// - If both the source and destination nodes are CPU nodes and
|
||||
// source node is already in the list of preferred nodes for
|
||||
// the vma. On multi-CPU NUMA node architectures, this avoids
|
||||
// unnecessary migrations between CPU nodes. Since the
|
||||
// specific ats_context->residency_node selected by
|
||||
// ats_batch_select_residency() is just a guess among the list
|
||||
// of preferred nodes, paying the cost of migration across the
|
||||
// CPU preferred nodes in this case can't be justified.
|
||||
if ((resident_node == ats_context->residency_node) ||
|
||||
resident_policy_match(vma, ats_context->residency_node, resident_node)) {
|
||||
uvm_page_mask_set(residency_mask, page_index);
|
||||
}
|
||||
|
||||
ats_context->prefetch_state.first_touch = false;
|
||||
}
|
||||
@@ -379,14 +428,20 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
{
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *accessed_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
|
||||
accessed_mask = &ats_context->faults.accessed_mask;
|
||||
else
|
||||
accessed_mask = &ats_context->access_counters.accessed_mask;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return;
|
||||
|
||||
@@ -406,7 +461,7 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
@@ -420,6 +475,11 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
|
||||
accessed_mask = &ats_context->faults.accessed_mask;
|
||||
else
|
||||
accessed_mask = &ats_context->access_counters.accessed_mask;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
@@ -432,12 +492,12 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
}
|
||||
else {
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, service_type, ats_context, max_prefetch_region);
|
||||
}
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
@@ -451,6 +511,65 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_page_mask_t *faults_serviced_mask)
|
||||
{
|
||||
NvU64 start = base + (region.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_size(region);
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space,
|
||||
vma,
|
||||
start,
|
||||
length,
|
||||
access_type,
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS,
|
||||
ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, region);
|
||||
|
||||
// WAR for older kernel versions missing an SMMU invalidate on RO -> RW
|
||||
// transition. The SMMU and GPU could have the stale RO copy cached in their
|
||||
// TLBs, which could have caused this write fault. This operation
|
||||
// invalidates the SMMU TLBs but not the GPU TLBs. That will happen below as
|
||||
// necessary.
|
||||
if (access_type == UVM_FAULT_ACCESS_TYPE_WRITE)
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel does not invalidate TLB entries on an invalid to valid
|
||||
// PTE transition. The GPU might have the invalid PTE cached in its TLB.
|
||||
// The GPU will re-fetch an entry on access if the PTE is invalid and the
|
||||
// page size is not 4K, but if the page size is 4K no re-fetch will happen
|
||||
// and the GPU will fault despite the CPU PTE being valid. We don't know
|
||||
// whether these faults happened due to stale entries after a transition,
|
||||
// so use the hammer of always invalidating the GPU's TLB on each fault.
|
||||
//
|
||||
// The second case is similar and handles missing ATS invalidations on RO ->
|
||||
// RW transitions for all page sizes. See the uvm_ats_smmu_invalidate_tlbs()
|
||||
// call above.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K || (UVM_ATS_SMMU_WAR_REQUIRED() && access_type == UVM_FAULT_ACCESS_TYPE_WRITE)) {
|
||||
flush_tlb_va_region(gpu_va_space, start, length, ats_context->client_type);
|
||||
}
|
||||
else {
|
||||
// ARM requires TLB invalidations on RO -> RW, but not all architectures
|
||||
// do. If we implement ATS support on other architectures, we might need
|
||||
// to issue GPU invalidates.
|
||||
UVM_ASSERT(NVCPU_IS_AARCH64);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
@@ -459,12 +578,11 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
@@ -480,7 +598,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return status;
|
||||
return NV_OK;
|
||||
|
||||
if (!(vma->vm_flags & VM_WRITE)) {
|
||||
// If VMA doesn't have write permissions, all write faults are fatal.
|
||||
@@ -496,72 +614,65 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// There are no pending faults beyond write faults to RO region.
|
||||
if (uvm_page_mask_empty(read_fault_mask))
|
||||
return status;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, UVM_ATS_SERVICE_TYPE_FAULTS, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
|
||||
UVM_FAULT_ACCESS_TYPE_WRITE :
|
||||
UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_fault_access_type_t access_type;
|
||||
uvm_page_mask_t *serviced_mask;
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
// entries with the old permission. The GPU will re-fetch the entry
|
||||
// if the PTE is invalid and page size is not 4K (this is the case
|
||||
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
|
||||
// has the PTEs cached with R/O permissions we will enter an
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
access_type = UVM_FAULT_ACCESS_TYPE_WRITE;
|
||||
serviced_mask = faults_serviced_mask;
|
||||
}
|
||||
else {
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
// write_fault_mask contains just the addresses with both read and
|
||||
// fatal write faults, so we need to service the read component.
|
||||
access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
serviced_mask = reads_serviced_mask;
|
||||
}
|
||||
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
access_type,
|
||||
ats_context,
|
||||
serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Remove write faults from read_fault_mask
|
||||
// Remove write faults from read_fault_mask to avoid double-service
|
||||
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
ats_context,
|
||||
faults_serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// Similarly to permission upgrade scenario, discussed above, GPU
|
||||
// will not re-fetch the entry if the PTE is invalid and page size
|
||||
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
|
||||
// new translation written explicitly like in the case of permission
|
||||
// upgrade.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
|
||||
// Handle HW prefetch only faults
|
||||
for_each_va_block_subregion_in_mask(subregion, prefetch_only_fault_mask, region) {
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
UVM_FAULT_ACCESS_TYPE_PREFETCH,
|
||||
ats_context,
|
||||
faults_serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -637,6 +748,8 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_page_mask_zero(&ats_context->access_counters.migrated_mask);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
@@ -650,21 +763,27 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// Remove pages which are already resident at the intended destination from
|
||||
// the accessed_mask.
|
||||
uvm_page_mask_andnot(&ats_context->accessed_mask,
|
||||
&ats_context->accessed_mask,
|
||||
uvm_page_mask_andnot(&ats_context->access_counters.accessed_mask,
|
||||
&ats_context->access_counters.accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
|
||||
uvm_page_mask_t *migrated_mask = &ats_context->access_counters.migrated_mask;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
|
||||
// clear access counters if pages were migrated or migration needs to
|
||||
// be retried
|
||||
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
|
||||
uvm_page_mask_region_fill(migrated_mask, subregion);
|
||||
else if (status != NV_WARN_NOTHING_TO_DO)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,18 +29,19 @@
|
||||
|
||||
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
|
||||
// type for individual pages in the range requested by page masks set in
|
||||
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
|
||||
// addresses fall completely within the VMA. The caller is also responsible for
|
||||
// ensuring that the faulting addresses don't overlap a GMMU region. (See
|
||||
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
|
||||
// any errors returned by this function (fault cancellations etc.).
|
||||
// ats_context->fault.read_fault_mask/write_fault_mask/prefetch_only_mask.
|
||||
// base must be aligned to UVM_VA_BLOCK_SIZE. The caller is responsible for
|
||||
// ensuring that faulting addresses fall completely within the VMA. The caller
|
||||
// is also responsible for ensuring that the faulting addresses don't overlap
|
||||
// a GMMU region. (See uvm_ats_check_in_gmmu_region). The caller is also
|
||||
// responsible for handling any errors returned by this function (fault
|
||||
// cancellations etc.).
|
||||
//
|
||||
// Returns the fault service status in ats_context->faults_serviced_mask. In
|
||||
// addition, ats_context->reads_serviced_mask returns whether read servicing
|
||||
// worked on write faults iff the read service was also requested in the
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// Returns the fault service status in ats_context->fault.faults_serviced_mask.
|
||||
// In addition, ats_context->fault.reads_serviced_mask returns whether read
|
||||
// servicing worked on write faults iff the read service was also requested in
|
||||
// the corresponding bit in read_fault_mask. These returned masks are only valid
|
||||
// if the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
@@ -52,9 +53,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// Service access counter notifications on ATS regions in the range (base, base
|
||||
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
|
||||
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
|
||||
// The caller is responsible for ensuring that the addresses in the
|
||||
// accessed_mask is completely covered by the VMA. The caller is also
|
||||
// set in ats_context->access_counters.accessed_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that the addresses
|
||||
// in the accessed_mask is completely covered by the VMA. The caller is also
|
||||
// responsible for handling any errors returned by this function.
|
||||
//
|
||||
// Returns NV_OK if servicing was successful. Any other error indicates an error
|
||||
|
||||
@@ -127,12 +127,12 @@ static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
|
||||
// We always use VCMDQ127 for the WAR
|
||||
#define VCMDQ 127
|
||||
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
static void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022-2023 NVIDIA Corporation
|
||||
Copyright (c) 2022-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
|
||||
@@ -51,14 +49,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||
|
||||
// TODO: Bug 3953852: Set this to true pending Blackwell changes
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
@@ -83,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@@ -102,4 +100,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = true;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_blackwell_fault_buffer.h"
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
#include "hwref/blackwell/gb100/dev_mmu.h"
|
||||
|
||||
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
|
||||
|
||||
|
||||
@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
NvU32 key_version;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -200,6 +200,7 @@ typedef struct
|
||||
// num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
|
||||
// does not support TSG for CE engines, a HW TSG is not created, but a TSG
|
||||
// object is required to allocate channels.
|
||||
//
|
||||
// When Confidential Computing mode is enabled, the WLC and LCIC channel
|
||||
// types require one TSG for each WLC/LCIC pair of channels. In this case,
|
||||
// we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
|
||||
@@ -228,21 +229,65 @@ typedef struct
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
union
|
||||
{
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its
|
||||
// internal operation and each push may modify this state.
|
||||
// push_locks is protected by the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
|
||||
// Per channel buffers in unprotected sysmem.
|
||||
uvm_rm_mem_t *pool_sysmem;
|
||||
|
||||
// Per channel buffers in protected vidmem.
|
||||
uvm_rm_mem_t *pool_vidmem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Current encryption key version, incremented upon key rotation.
|
||||
// While there are separate keys for encryption and decryption, the
|
||||
// two keys are rotated at once, so the versioning applies to both.
|
||||
NvU32 version;
|
||||
|
||||
// Lock used to ensure mutual exclusion during key rotation.
|
||||
uvm_mutex_t mutex;
|
||||
|
||||
// CSL contexts passed to RM for key rotation. This is usually an
|
||||
// array containing the CSL contexts associated with the channels in
|
||||
// the pool. In the case of the WLC pool, the array also includes
|
||||
// CSL contexts associated with LCIC channels.
|
||||
UvmCslContext **csl_contexts;
|
||||
|
||||
// Number of elements in the CSL context array.
|
||||
unsigned num_csl_contexts;
|
||||
|
||||
// Number of bytes encrypted, or decrypted, on the engine associated
|
||||
// with the pool since the last key rotation. Only used during
|
||||
// testing, to force key rotations after a certain encryption size,
|
||||
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
|
||||
//
|
||||
// Encryptions on a LCIC pool are accounted for in the paired WLC
|
||||
// pool.
|
||||
//
|
||||
// TODO: Bug 4612912: these accounting variables can be removed once
|
||||
// RM exposes an API to set the key rotation lower threshold.
|
||||
atomic64_t encrypted;
|
||||
atomic64_t decrypted;
|
||||
} key_rotation;
|
||||
|
||||
} conf_computing;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -322,43 +367,14 @@ struct uvm_channel_struct
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
// Protected sysmem location makes WLC independent from the pushbuffer
|
||||
// allocator. Unprotected sysmem and protected vidmem counterparts
|
||||
// are allocated from the channel pool (sysmem, vidmem).
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
@@ -401,6 +417,8 @@ struct uvm_channel_struct
|
||||
struct list_head channel_list_node;
|
||||
NvU32 pending_event_count;
|
||||
} tools;
|
||||
|
||||
bool suspended_p2p;
|
||||
};
|
||||
|
||||
struct uvm_channel_manager_struct
|
||||
@@ -451,8 +469,24 @@ struct uvm_channel_manager_struct
|
||||
UVM_BUFFER_LOCATION gpput_loc;
|
||||
UVM_BUFFER_LOCATION pushbuffer_loc;
|
||||
} conf;
|
||||
|
||||
struct
|
||||
{
|
||||
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
|
||||
// only be false during (de)initialization.
|
||||
bool wlc_ready;
|
||||
|
||||
// True indicates that key rotation is enabled (UVM-wise).
|
||||
bool key_rotation_enabled;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// Index of a channel pool within the manager
|
||||
static unsigned uvm_channel_pool_index_in_channel_manager(const uvm_channel_pool_t *pool)
|
||||
{
|
||||
return pool - pool->manager->channel_pools;
|
||||
}
|
||||
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
@@ -501,6 +535,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
|
||||
|
||||
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@@ -518,6 +560,11 @@ static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
return !uvm_channel_pool_is_sec2(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_p2p(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_p2p(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
@@ -532,6 +579,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
return UVM_CHANNEL_TYPE_MEMOPS;
|
||||
}
|
||||
|
||||
// Force key rotation in the engine associated with the given channel pool.
|
||||
// Rotation may still not happen if RM cannot acquire the necessary locks (in
|
||||
// which case the function returns NV_ERR_STATE_IN_USE).
|
||||
//
|
||||
// This function should be only invoked in pools in which key rotation is
|
||||
// enabled.
|
||||
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Retrieve the current encryption key version associated with the channel pool.
|
||||
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
|
||||
|
||||
// Privileged channels support all the Host and engine methods, while
|
||||
// non-privileged channels don't support privileged methods.
|
||||
//
|
||||
@@ -542,14 +600,25 @@ bool uvm_channel_is_privileged(uvm_channel_t *channel);
|
||||
// Destroy the channel manager
|
||||
void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Suspend p2p traffic on channels used for p2p operations.
|
||||
// This is used in STO recovery sequence to quiet nvlink traffic before the
|
||||
// links can be restored.
|
||||
NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Resume p2p traffic on channels used for p2p operations.
|
||||
// This is used at the end of the STO recovery sequence to resume suspended p2p
|
||||
// traffic on p2p channels.
|
||||
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Get the current status of the channel
|
||||
// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
|
||||
// otherwise. Notably this never sets the global fatal error.
|
||||
// Returns NV_OK if the channel is in a good state,
|
||||
// NV_ERR_RC_ERROR otherwise.
|
||||
// Notably this never sets the global fatal error.
|
||||
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
|
||||
|
||||
// Check for channel errors
|
||||
// Checks for channel errors by calling uvm_channel_get_status(). If an error
|
||||
// occurred, sets the global fatal error and prints errors.
|
||||
// Checks for channel errors by calling uvm_channel_get_status().
|
||||
// If a fatal error occurred, sets the global fatal error and prints errors.
|
||||
NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
|
||||
|
||||
// Check errors on all channels in the channel manager
|
||||
@@ -579,13 +648,11 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
return manager->conf_computing.wlc_ready;
|
||||
}
|
||||
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
//
|
||||
|
||||
@@ -206,9 +206,10 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
for (i = 0; i < 512; ++i) {
|
||||
@@ -796,11 +797,8 @@ done:
|
||||
static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_push_t *pushes = NULL;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
@@ -810,9 +808,19 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
// Key rotation is disabled because this test relies on nested pushes,
|
||||
// which is illegal. If any push other than the first one triggers key
|
||||
// rotation, the test won't complete. This is because key rotation
|
||||
// depends on waiting for ongoing pushes to end, which doesn't happen
|
||||
// if those pushes are ended after the current one begins.
|
||||
uvm_conf_computing_disable_key_rotation(gpu);
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
|
||||
TEST_CHECK_GOTO(pool != NULL, error);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
@@ -824,7 +832,7 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
TEST_CHECK_GOTO(pushes != NULL, error);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
@@ -841,12 +849,18 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
if (gpu != NULL)
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
@@ -948,6 +962,318 @@ release:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
|
||||
{
|
||||
unsigned num_tries;
|
||||
unsigned max_num_tries = 20;
|
||||
unsigned num_rotations_completed = 0;
|
||||
|
||||
if (num_rotations == 0)
|
||||
return NV_OK;
|
||||
|
||||
// The number of accepted rotations is kept low, so failed rotation
|
||||
// invocations due to RM not acquiring the necessary locks (which imply a
|
||||
// sleep in the test) do not balloon the test execution time.
|
||||
UVM_ASSERT(num_rotations <= 10);
|
||||
|
||||
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
|
||||
// Force key rotation, irrespective of encryption usage.
|
||||
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
|
||||
|
||||
// Key rotation may not be able to complete due to RM failing to acquire
|
||||
// the necessary locks. Detect the situation, sleep for a bit, and then
|
||||
// try again
|
||||
//
|
||||
// The maximum time spent sleeping in a single rotation call is
|
||||
// (max_num_tries * max_sleep_us)
|
||||
if (status == NV_ERR_STATE_IN_USE) {
|
||||
NvU32 min_sleep_us = 1000;
|
||||
NvU32 max_sleep_us = 10000;
|
||||
|
||||
usleep_range(min_sleep_us, max_sleep_us);
|
||||
continue;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
num_rotations_completed++;
|
||||
}
|
||||
|
||||
// If not a single key rotation occurred, the dependent tests still pass,
|
||||
// but there is no much value to them. Instead, return an error so the
|
||||
// maximum number of tries, or the maximum sleep time, are adjusted to
|
||||
// ensure that at least one rotation completes.
|
||||
if (num_rotations_completed > 0)
|
||||
return NV_OK;
|
||||
else
|
||||
return NV_ERR_STATE_IN_USE;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return force_key_rotations(pool, 1);
|
||||
}
|
||||
|
||||
// Test key rotation in all pools. This is useful because key rotation may not
|
||||
// happen otherwise on certain engines during UVM test execution. For example,
|
||||
// if the MEMOPS channel type is mapped to a CE not shared with any other
|
||||
// channel type, then the only encryption taking place in the engine is due to
|
||||
// semaphore releases (4 bytes each). This small encryption size makes it
|
||||
// unlikely to exceed even small rotation thresholds.
|
||||
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, gpu->channel_manager) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(force_key_rotation(pool));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
|
||||
// key rotations.
|
||||
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
void *final_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
final_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (final_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
final_plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
NULL,
|
||||
"GPU > CPU"),
|
||||
out);
|
||||
|
||||
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
|
||||
|
||||
memset(final_plain_cpu, 0, size);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(final_plain_cpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
|
||||
// testing to insert key rotations in between the push end, and the CPU
|
||||
// decryption
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
uvm_channel_t *channel;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
channel = push.channel;
|
||||
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
|
||||
|
||||
// If num_rotations_to_insert is not zero, the current encryption key will
|
||||
// be different from the one used during CE encryption.
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
|
||||
unsigned num_repetitions,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
unsigned i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
NvU8 *plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
|
||||
if (plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
|
||||
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
for (i = 0; i < num_repetitions; i++) {
|
||||
unsigned j;
|
||||
|
||||
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
num_rotations_to_insert),
|
||||
out);
|
||||
|
||||
for (j = 0; j < size; j++)
|
||||
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
|
||||
|
||||
memset(plain_cpu, 0, size);
|
||||
|
||||
}
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
|
||||
// that are no longer the current key, due to key rotation. Given that SEC2
|
||||
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
|
||||
// is exclusive of CE encryptions.
|
||||
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
|
||||
// between the GPU encryption, and the associated CPU decryption.
|
||||
unsigned num_rotations_to_insert = 8;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
break;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -967,6 +1293,7 @@ static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
@@ -1006,6 +1333,7 @@ static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -1094,7 +1422,7 @@ static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
|
||||
|
||||
// Release the semaphore.
|
||||
UVM_WRITE_ONCE(*cpu_ptr, 1);
|
||||
WRITE_ONCE(*cpu_ptr, 1);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
|
||||
@@ -1148,6 +1476,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@@ -1203,6 +1532,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_channel_key_rotation(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@@ -1338,6 +1671,126 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
unsigned num_rotations_to_insert = 0;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
|
||||
|
||||
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
|
||||
|
||||
for (i = 0; i < params->iterations; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_type_t type;
|
||||
|
||||
if ((i % 3) == 0)
|
||||
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
|
||||
else if ((i % 3) == 1)
|
||||
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
|
||||
else
|
||||
type = UVM_CHANNEL_TYPE_WLC;
|
||||
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
status = force_key_rotation(pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The objective of this test is documented in the user-level function
|
||||
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
uvm_test_rng_t rng;
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
uvm_test_rng_init(&rng, params->seed);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
|
||||
// one.
|
||||
gpu = random_va_space_gpu(&rng, va_space);
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
goto out;
|
||||
|
||||
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
|
||||
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
|
||||
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
|
||||
status = channel_stress_key_rotation_rotate(gpu, params);
|
||||
else
|
||||
status = NV_ERR_INVALID_PARAMETER;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@@ -1349,6 +1802,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
|
||||
return uvm_test_channel_stress_update_channels(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
|
||||
return uvm_test_channel_noop_push(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
|
||||
return uvm_test_channel_stress_key_rotation(va_space, params);
|
||||
default:
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
@@ -281,29 +281,6 @@ NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
|
||||
// what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
|
||||
// that we know that we have a real, binary formatted UUID that will work in
|
||||
// the UVM APIs.
|
||||
//
|
||||
// It comes out like this:
|
||||
//
|
||||
// UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
|
||||
//
|
||||
// This routine will always null-terminate the string for you. This is true
|
||||
// even if the buffer was too small!
|
||||
//
|
||||
// Return value is the number of non-null characters written.
|
||||
//
|
||||
// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
|
||||
// return it's default format, which is ascii, not binary, then you would get
|
||||
// this back:
|
||||
//
|
||||
// GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
|
||||
//
|
||||
// ...which is actually a character string, and won't work for UVM API calls.
|
||||
// So it's very important to be able to see the difference.
|
||||
//
|
||||
static char uvm_digit_to_hex(unsigned value)
|
||||
{
|
||||
if (value >= 10)
|
||||
@@ -312,27 +289,19 @@ static char uvm_digit_to_hex(unsigned value)
|
||||
return value + '0';
|
||||
}
|
||||
|
||||
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *pUuidStruct)
|
||||
{
|
||||
char *str = buffer+8;
|
||||
char *str = buffer;
|
||||
unsigned i;
|
||||
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
|
||||
|
||||
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
|
||||
return *buffer = 0;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
|
||||
|
||||
if (dashMask & (1 << (i+1)))
|
||||
if (dashMask & (1 << (i + 1)))
|
||||
*str++ = '-';
|
||||
}
|
||||
|
||||
*str = 0;
|
||||
|
||||
return (int)(str-buffer);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -50,10 +50,14 @@ enum {
|
||||
NVIDIA_UVM_NUM_MINOR_DEVICES
|
||||
};
|
||||
|
||||
#define UVM_GPU_UUID_TEXT_BUFFER_LENGTH (8+16*2+4+1)
|
||||
// UUID has the format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
#define UVM_UUID_STRING_LENGTH ((8 + 1) + 3 * (4 + 1) + 12 + 1)
|
||||
|
||||
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pGpuUuid);
|
||||
// Writes UVM_UUID_STRING_LENGTH characters into buffer, including a terminating
|
||||
// NULL.
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
|
||||
// Long prefix - typically for debugging and tests.
|
||||
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix "%s:%u %s[pid:%d]" fmt, \
|
||||
kbasename(__FILE__), \
|
||||
@@ -62,10 +66,15 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
|
||||
current->pid, \
|
||||
##__VA_ARGS__)
|
||||
|
||||
// Short prefix - typically for information.
|
||||
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix fmt, ##__VA_ARGS__)
|
||||
|
||||
// No prefix - used by kernel panic messages.
|
||||
#define UVM_PRINT_FUNC(func, fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
|
||||
|
||||
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
|
||||
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
|
||||
bool uvm_debug_prints_enabled(void);
|
||||
|
||||
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
|
||||
@@ -77,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_PRINT(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT(fmt, ...) \
|
||||
@@ -92,33 +101,18 @@ bool uvm_debug_prints_enabled(void);
|
||||
#define UVM_DBG_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
|
||||
// used for relaying driver-level information, rather than detailed debugging
|
||||
// information; therefore, it does not add the "pretty long prefix".
|
||||
#define UVM_INFO_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
//
|
||||
// Please see the documentation of format_uuid_to_buffer, for details on what
|
||||
// this routine prints for you.
|
||||
//
|
||||
#define UVM_DBG_PRINT_UUID(msg, uuidPtr) \
|
||||
do { \
|
||||
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
|
||||
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
|
||||
UVM_DBG_PRINT("%s: %s\n", msg, uuidBuffer); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT_UUID(msg, uuidPtr, ...) \
|
||||
do { \
|
||||
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
|
||||
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", uuidBuffer, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
|
||||
#define UVM_PANIC_MSG(fmt, ...) UVM_PRINT_FUNC(panic, ": " fmt, ##__VA_ARGS__)
|
||||
|
||||
@@ -149,13 +143,13 @@ void on_uvm_test_fail(void);
|
||||
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
|
||||
void on_uvm_assert(void);
|
||||
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Prevent function calls in expr and the print argument list from being
|
||||
@@ -166,7 +160,8 @@ void on_uvm_assert(void);
|
||||
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
|
||||
// builds.
|
||||
#if UVM_IS_DEBUG() || defined __COVERITY__
|
||||
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
|
||||
@@ -189,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
|
||||
// Given these are enabled for release builds, we need to be more cautious than
|
||||
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
|
||||
// param is enabled.
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
@@ -395,7 +390,7 @@ static inline void uvm_touch_page(struct page *page)
|
||||
UVM_ASSERT(page);
|
||||
|
||||
mapping = (char *) kmap(page);
|
||||
(void)UVM_READ_ONCE(*mapping);
|
||||
(void)READ_ONCE(*mapping);
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
@@ -423,7 +418,9 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
|
||||
UVM_ASSERT(first);
|
||||
UVM_ASSERT(outer);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
|
||||
// even when plugged into platforms using it.
|
||||
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -33,6 +33,15 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
// Amount of encrypted data on a given engine that triggers key rotation. This
|
||||
// is a UVM internal threshold, different from that of RM, and used only during
|
||||
// testing.
|
||||
//
|
||||
// Key rotation is triggered when the total encryption size, or the total
|
||||
// decryption size (whatever comes first) reaches this lower threshold on the
|
||||
// engine.
|
||||
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
|
||||
|
||||
// The maximum number of secure operations per push is:
|
||||
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
|
||||
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
|
||||
@@ -56,32 +65,13 @@ static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_I
|
||||
|
||||
module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
|
||||
|
||||
static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return parent->rm_info.gpuConfComputeCaps.mode;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
uvm_parent_gpu_t *other_parent;
|
||||
UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
for_each_parent_gpu(other_parent)
|
||||
UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
|
||||
// Confidential Computing enablement on the system should match enablement
|
||||
// on the GPU.
|
||||
UVM_ASSERT(parent->rm_info.gpuConfComputeCaps.bConfComputingEnabled == g_uvm_global.conf_computing_enabled);
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
@@ -334,9 +324,6 @@ static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_conf_computing_mode_is_hcc(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@@ -352,6 +339,19 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The production key rotation defaults are such that key rotations rarely
|
||||
// happen. During UVM testing more frequent rotations are triggering by relying
|
||||
// on internal encryption usage accounting. When key rotations are triggered by
|
||||
// UVM, the driver does not rely on channel key rotation notifiers.
|
||||
//
|
||||
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
|
||||
// lower threshold. This function, and all the metadata associated with it
|
||||
// (per-pool encryption accounting, for example) can be removed at that point.
|
||||
static bool key_rotation_is_notifier_driven(void)
|
||||
{
|
||||
return !uvm_enable_builtin_tests;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -394,17 +394,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
|
||||
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
|
||||
}
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
@@ -428,27 +446,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
UVM_ASSERT(size);
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(NvU8 const *) src_plain,
|
||||
encrypt_iv,
|
||||
(NvU8 *) dst_cipher,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer)
|
||||
{
|
||||
@@ -469,11 +506,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
NV_U32_MAX,
|
||||
key_version,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
channel->name,
|
||||
uvm_gpu_name(uvm_channel_get_gpu(channel)));
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
return status;
|
||||
@@ -487,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@@ -526,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
@@ -640,3 +685,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
// Key rotation cannot be enabled on UVM if it is disabled on RM
|
||||
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
|
||||
return false;
|
||||
|
||||
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
|
||||
// because currently the encryption key is shared between UVM and RM, but
|
||||
// UVM is not able to idle SEC2 channels owned by RM.
|
||||
if (uvm_channel_pool_is_sec2(pool))
|
||||
return false;
|
||||
|
||||
// Key rotation happens as part of channel reservation, and LCIC channels
|
||||
// are never reserved directly. Rotation of keys in LCIC channels happens
|
||||
// as the result of key rotation in WLC channels.
|
||||
//
|
||||
// Return false even if there is nothing fundamental prohibiting direct key
|
||||
// rotation on LCIC pools
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NvU64 decrypted, encrypted;
|
||||
|
||||
UVM_ASSERT(!key_rotation_is_notifier_driven());
|
||||
|
||||
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
|
||||
|
||||
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
|
||||
|
||||
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
|
||||
{
|
||||
// If key rotation is pending for the pool's engine, then the key rotation
|
||||
// notifier in any of the engine channels can be used by UVM to detect the
|
||||
// situation. Note that RM doesn't update all the notifiers in a single
|
||||
// atomic operation, so it is possible that the channel read by UVM (the
|
||||
// first one in the pool) indicates that a key rotation is pending, but
|
||||
// another channel in the pool (temporarily) indicates the opposite, or vice
|
||||
// versa.
|
||||
uvm_channel_t *first_channel = pool->channels;
|
||||
|
||||
UVM_ASSERT(key_rotation_is_notifier_driven());
|
||||
UVM_ASSERT(first_channel != NULL);
|
||||
|
||||
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return false;
|
||||
|
||||
if (key_rotation_is_notifier_driven())
|
||||
return conf_computing_is_key_rotation_pending_use_notifier(pool);
|
||||
else
|
||||
return conf_computing_is_key_rotation_pending_use_stats(pool);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
|
||||
|
||||
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
|
||||
// required locks at this time. This status is not interpreted as an error,
|
||||
// but as a sign for UVM to try again later. This is the same "protocol"
|
||||
// used in IV rotation.
|
||||
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
|
||||
pool->conf_computing.key_rotation.num_csl_contexts);
|
||||
|
||||
if (status == NV_OK) {
|
||||
pool->conf_computing.key_rotation.version++;
|
||||
|
||||
if (!key_rotation_is_notifier_driven()) {
|
||||
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
|
||||
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
|
||||
}
|
||||
}
|
||||
else if (status != NV_ERR_STATE_IN_USE) {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
|
||||
pool->engine_index,
|
||||
nvstatusToString(status));
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -62,8 +62,6 @@
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
|
||||
@@ -87,9 +85,9 @@ typedef struct
|
||||
// a free buffer.
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
|
||||
// writes the authentication tag here. Later when the buffer is decrypted
|
||||
// on the CPU the authentication tag is used again (read) for CSL to verify
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// engine (CE or SEC2) writes the authentication tag here. When the buffer
|
||||
// is decrypted on the CPU the authentication tag is used by CSL to verify
|
||||
// the authenticity. The allocation is big enough for one authentication
|
||||
// tag per PAGE_SIZE page in the alloc buffer.
|
||||
uvm_mem_t *auth_tag;
|
||||
@@ -98,7 +96,12 @@ typedef struct
|
||||
// to the authentication tag. The allocation is big enough for one IV per
|
||||
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
|
||||
// IV and authentication tag must match.
|
||||
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
|
||||
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// key version used during GPU encryption of each PAGE_SIZE page can be
|
||||
// saved here, so CPU decryption uses the correct decryption key.
|
||||
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// Bitmap of the encrypted pages in the backing allocation
|
||||
uvm_page_mask_t encrypted_page_mask;
|
||||
@@ -147,7 +150,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
// Logs encryption information from the GPU and returns the IV.
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
|
||||
|
||||
// Acquires next CPU encryption IV and returns it.
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
@@ -167,10 +170,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
// CPU side decryption helper. Decrypts data from src_cipher and writes the
|
||||
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
|
||||
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
|
||||
//
|
||||
// The caller must indicate which key to use for decryption by passing the
|
||||
// appropiate key version number.
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer);
|
||||
|
||||
@@ -214,4 +221,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
// Check if there are fewer than 'limit' messages available in either direction
|
||||
// and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
|
||||
|
||||
// Rotate the engine key associated with the given channel pool.
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is allowed in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is pending in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
|
||||
// dependent on RM enablement: key rotation may still be disabled upon calling
|
||||
// this function, if it is disabled in RM. On the other hand, key rotation can
|
||||
// be disabled in UVM, even if it is enabled in RM.
|
||||
//
|
||||
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
|
||||
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
|
||||
// kernel key rotation.
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
|
||||
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
|
||||
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
// This file provides simple wrappers that are always built with optimizations
|
||||
// turned on to WAR issues with functions that don't build correctly otherwise.
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
int nv_atomic_xchg(atomic_t *val, int new)
|
||||
{
|
||||
return atomic_xchg(val, new);
|
||||
}
|
||||
|
||||
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
|
||||
{
|
||||
return atomic_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
|
||||
{
|
||||
return atomic_long_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
return copy_from_user(to, from, n);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
return copy_to_user(to, from, n);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -42,7 +42,6 @@ typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
|
||||
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_page_struct uvm_gpu_semaphore_pool_page_t;
|
||||
typedef struct uvm_gpu_peer_struct uvm_gpu_peer_t;
|
||||
typedef struct uvm_mmu_mode_hal_struct uvm_mmu_mode_hal_t;
|
||||
|
||||
typedef struct uvm_channel_manager_struct uvm_channel_manager_t;
|
||||
@@ -57,6 +56,12 @@ typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
|
||||
|
||||
typedef struct uvm_va_policy_struct uvm_va_policy_t;
|
||||
typedef struct uvm_va_range_struct uvm_va_range_t;
|
||||
typedef struct uvm_va_range_managed_struct uvm_va_range_managed_t;
|
||||
typedef struct uvm_va_range_external_struct uvm_va_range_external_t;
|
||||
typedef struct uvm_va_range_channel_struct uvm_va_range_channel_t;
|
||||
typedef struct uvm_va_range_sked_reflected_struct uvm_va_range_sked_reflected_t;
|
||||
typedef struct uvm_va_range_semaphore_pool_struct uvm_va_range_semaphore_pool_t;
|
||||
typedef struct uvm_va_range_device_p2p_struct uvm_va_range_device_p2p_t;
|
||||
typedef struct uvm_va_block_struct uvm_va_block_t;
|
||||
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
|
||||
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
|
||||
@@ -88,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
|
||||
|
||||
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
|
||||
|
||||
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
|
||||
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
|
||||
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
|
||||
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
|
||||
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
|
||||
|
||||
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
|
||||
|
||||
|
||||
@@ -115,8 +115,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
|
||||
TEST_CHECK_RET(skip);
|
||||
|
||||
memory_owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &memory_info->uuid);
|
||||
if (memory_owning_gpu == NULL)
|
||||
memory_owning_gpu = uvm_va_space_get_gpu_by_mem_info(va_space, memory_info);
|
||||
if (!memory_owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
|
||||
@@ -129,7 +129,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
phys_offset = mapping_offset;
|
||||
|
||||
// Add the physical offset for nvswitch connected peer mappings
|
||||
if (uvm_aperture_is_peer(aperture) && uvm_gpus_are_nvswitch_connected(memory_mapping_gpu, memory_owning_gpu))
|
||||
if (uvm_aperture_is_peer(aperture) &&
|
||||
uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
|
||||
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
|
||||
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_perf_heuristics.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_pmm_gpu.h"
|
||||
#include "uvm_migrate.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
@@ -54,6 +55,9 @@ static NV_STATUS uvm_register_callbacks(void)
|
||||
g_exported_uvm_ops.stopDevice = NULL;
|
||||
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
|
||||
|
||||
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
|
||||
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
|
||||
|
||||
// Register the UVM callbacks with the main GPU driver:
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
|
||||
if (status != NV_OK)
|
||||
@@ -87,6 +91,8 @@ NV_STATUS uvm_global_init(void)
|
||||
uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
|
||||
INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
|
||||
uvm_mutex_init(&g_uvm_global.devmem_ranges.lock, UVM_LOCK_ORDER_LEAF);
|
||||
INIT_LIST_HEAD(&g_uvm_global.devmem_ranges.list);
|
||||
|
||||
status = uvm_kvmalloc_init();
|
||||
if (status != NV_OK) {
|
||||
@@ -100,12 +106,6 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_procfs_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
|
||||
@@ -194,15 +194,28 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
|
||||
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
|
||||
// this point:
|
||||
status = uvm_access_counters_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's
|
||||
// top-half ISR callback. As soon as this call completes, GPU interrupts
|
||||
// will start arriving, so it's important to be prepared to receive
|
||||
// interrupts before this point.
|
||||
status = uvm_register_callbacks();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
@@ -214,13 +227,11 @@ void uvm_global_exit(void)
|
||||
{
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
|
||||
|
||||
// Guarantee completion of any release callbacks scheduled after the flush
|
||||
// in uvm_resume().
|
||||
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
|
||||
uvm_unregister_callbacks();
|
||||
uvm_access_counters_exit();
|
||||
uvm_service_block_context_exit();
|
||||
|
||||
uvm_perf_heuristics_exit();
|
||||
uvm_perf_events_exit();
|
||||
uvm_migrate_exit();
|
||||
@@ -229,6 +240,7 @@ void uvm_global_exit(void)
|
||||
uvm_va_policy_exit();
|
||||
uvm_mem_global_exit();
|
||||
uvm_pmm_sysmem_exit();
|
||||
uvm_pmm_devmem_exit();
|
||||
uvm_gpu_exit();
|
||||
uvm_processor_mask_cache_exit();
|
||||
|
||||
@@ -237,7 +249,6 @@ void uvm_global_exit(void)
|
||||
|
||||
uvm_procfs_exit();
|
||||
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.global_q);
|
||||
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
|
||||
@@ -282,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// * Flush relevant kthread queues (bottom half, etc.)
|
||||
|
||||
// Some locks acquired by this function, such as pm.lock, are released
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@@ -299,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// Since fault buffer state may be lost across sleep cycles, UVM must
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// guarantees that all user channels have been preempted before
|
||||
// uvm_suspend() is called, which implies that no user channels can be
|
||||
// stalled on faults when this point is reached.
|
||||
@@ -325,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
}
|
||||
|
||||
// Acquire each VA space's lock in write mode to lock out VMA open and
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// options, and so aren't suitable for synchronization with pm.lock.
|
||||
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
|
||||
|
||||
@@ -355,7 +366,7 @@ static NV_STATUS uvm_resume(void)
|
||||
g_uvm_global.pm.is_suspended = false;
|
||||
|
||||
// Some locks released by this function, such as pm.lock, were acquired
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@@ -387,7 +398,7 @@ static NV_STATUS uvm_resume(void)
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
// Force completion of any release callbacks successfully queued for
|
||||
// deferred completion while suspended. The deferred release
|
||||
// deferred completion while suspended. The deferred release
|
||||
// queue is not guaranteed to remain empty following this flush since
|
||||
// some threads that failed to acquire pm.lock in uvm_release() may
|
||||
// not have scheduled their handlers yet.
|
||||
@@ -412,15 +423,18 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
|
||||
UVM_ASSERT(error != NV_OK);
|
||||
|
||||
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
previous_error = atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
|
||||
if (previous_error == NV_OK) {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
|
||||
}
|
||||
else {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
|
||||
nvstatusToString(error), nvstatusToString(previous_error));
|
||||
nvstatusToString(error),
|
||||
nvstatusToString(previous_error));
|
||||
}
|
||||
|
||||
nvUvmInterfaceReportFatalError(error);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
@@ -430,7 +444,7 @@ NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
return atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
}
|
||||
|
||||
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
|
||||
@@ -470,3 +484,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(resumeP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_gpu_in_mask(gpu, gpus) {
|
||||
NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -52,19 +52,23 @@ struct uvm_global_struct
|
||||
// Created on module load and destroyed on module unload
|
||||
uvmGpuSessionHandle rm_session_handle;
|
||||
|
||||
// peer-to-peer table
|
||||
// peer info is added and removed from this table when usermode
|
||||
// driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
|
||||
// respectively.
|
||||
uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
|
||||
// Peer-to-peer table for storing parent GPU and MIG instance peer info.
|
||||
// Note that MIG instances can be peers within a single parent GPU or
|
||||
// be peers in different parent GPUs if NVLINK or PCIe peers is enabled.
|
||||
// PCIe and MIG peer info is added and removed from this table when
|
||||
// usermode driver calls UvmEnablePeerAccess() and UvmDisablePeerAccess()
|
||||
// respectively. NvLink and MIG peers are updated when UvmRegisterGpu() and
|
||||
// UvmUnregisterGpu() are called. Peer to peer state for MIG instances
|
||||
// within the same parent GPU are not stored here.
|
||||
uvm_parent_gpu_peer_t parent_gpu_peers[UVM_MAX_UNIQUE_PARENT_GPU_PAIRS];
|
||||
|
||||
// peer-to-peer copy mode
|
||||
// Pascal+ GPUs support virtual addresses in p2p copies.
|
||||
// Ampere+ GPUs add support for physical addresses in p2p copies.
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
|
||||
// do most anything other than try and clean up as much as possible.
|
||||
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse
|
||||
// to do most anything other than try and clean up as much as possible.
|
||||
// An example of a fatal error is an unrecoverable ECC error on one of the
|
||||
// GPUs.
|
||||
atomic_t fatal_error;
|
||||
@@ -153,6 +157,12 @@ struct uvm_global_struct
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
|
||||
// List of all devmem ranges allocted on this GPU
|
||||
struct {
|
||||
uvm_mutex_t lock;
|
||||
struct list_head list;
|
||||
} devmem_ranges;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
@@ -167,6 +177,12 @@ NV_STATUS uvm_suspend_entry(void);
|
||||
// Recover after exit from a system sleep state
|
||||
NV_STATUS uvm_resume_entry(void);
|
||||
|
||||
// Block all P2P traffic on the GPU's channels
|
||||
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid);
|
||||
|
||||
// Resume P2P traffic on the GPU's channels
|
||||
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid);
|
||||
|
||||
// Add parent GPU to the global table.
|
||||
//
|
||||
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
||||
@@ -232,12 +248,12 @@ static uvmGpuSessionHandle uvm_global_session_handle(void)
|
||||
// suspended.
|
||||
#define UVM_GPU_WRITE_ONCE(x, val) do { \
|
||||
UVM_ASSERT(!uvm_global_is_suspended()); \
|
||||
UVM_WRITE_ONCE(x, val); \
|
||||
WRITE_ONCE(x, val); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_GPU_READ_ONCE(x) ({ \
|
||||
UVM_ASSERT(!uvm_global_is_suspended()); \
|
||||
UVM_READ_ONCE(x); \
|
||||
READ_ONCE(x); \
|
||||
})
|
||||
|
||||
static bool global_is_fatal_error_assert_disabled(void)
|
||||
@@ -296,7 +312,7 @@ static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *
|
||||
return gpu;
|
||||
}
|
||||
|
||||
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
||||
static uvm_gpu_t *uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
@@ -318,7 +334,45 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
|
||||
#define for_each_gpu_in_mask(gpu, mask) \
|
||||
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
|
||||
gpu = uvm_processor_mask_find_next_gpu(mask, gpu))
|
||||
|
||||
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_first_gpu(const uvm_parent_processor_mask_t *mask)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
uvm_parent_gpu_id_t parent_id = uvm_parent_processor_mask_find_first_gpu_id(mask);
|
||||
|
||||
if (UVM_PARENT_ID_IS_INVALID(parent_id))
|
||||
return NULL;
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get(parent_id);
|
||||
|
||||
// See comment in uvm_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
|
||||
return parent_gpu;
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_next_gpu(const uvm_parent_processor_mask_t *mask,
|
||||
uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_parent_gpu_id_t parent_id;
|
||||
|
||||
UVM_ASSERT(parent_gpu);
|
||||
parent_id = uvm_parent_processor_mask_find_next_gpu_id(mask, uvm_parent_id_next(parent_gpu->id));
|
||||
if (UVM_PARENT_ID_IS_INVALID(parent_id))
|
||||
return NULL;
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get(parent_id);
|
||||
|
||||
// See comment in uvm_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
|
||||
return parent_gpu;
|
||||
}
|
||||
|
||||
// Helper to iterate over all parent GPUs in the input mask
|
||||
#define for_each_parent_gpu_in_mask(parent_gpu, mask) \
|
||||
for ((parent_gpu) = uvm_parent_processor_mask_find_first_gpu((mask)); \
|
||||
(parent_gpu); \
|
||||
(parent_gpu) = uvm_parent_processor_mask_find_next_gpu((mask), (parent_gpu)))
|
||||
|
||||
// Helper to iterate over all GPUs retained by the UVM driver
|
||||
// (across all va spaces).
|
||||
@@ -326,7 +380,7 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
gpu = uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
|
||||
// LOCKING: Must hold either the global_lock or the gpu_table_lock
|
||||
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -384,7 +438,7 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
|
||||
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
|
||||
|
||||
// LOCKING: Must hold the global_lock
|
||||
#define for_each_gpu_in_parent(parent_gpu, gpu) \
|
||||
#define for_each_gpu_in_parent(gpu, parent_gpu) \
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
|
||||
(gpu) != NULL; \
|
||||
@@ -403,6 +457,10 @@ void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
|
||||
|
||||
// Check for nvlink errors for all GPUs in a mask
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus);
|
||||
|
||||
// Pre-allocate fault service contexts.
|
||||
NV_STATUS uvm_service_block_context_init(void);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -49,9 +49,13 @@
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
|
||||
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
|
||||
#define UVM_PARENT_GPU_UUID_PREFIX "GPU-"
|
||||
#define UVM_GPU_UUID_PREFIX "GI-"
|
||||
|
||||
// UVM_UUID_STRING_LENGTH already includes NULL, don't double-count it with
|
||||
// sizeof()
|
||||
#define UVM_PARENT_GPU_UUID_STRING_LENGTH (sizeof(UVM_PARENT_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
|
||||
#define UVM_GPU_UUID_STRING_LENGTH (sizeof(UVM_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
|
||||
|
||||
#define UVM_GPU_MAGIC_VALUE 0xc001d00d12341993ULL
|
||||
|
||||
@@ -93,6 +97,11 @@ struct uvm_service_block_context_struct
|
||||
// been serviced
|
||||
uvm_processor_mask_t resident_processors;
|
||||
|
||||
// A mask of GPUs that need to be checked for NVLINK errors before the
|
||||
// handler returns, but after the VA space lock has been unlocked
|
||||
// to avoid RM/UVM VA space lock deadlocks.
|
||||
uvm_processor_mask_t gpus_to_check_for_nvlink_errors;
|
||||
|
||||
// VA block region that contains all the pages affected by the operation
|
||||
uvm_va_block_region_t region;
|
||||
|
||||
@@ -180,33 +189,56 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
|
||||
// Access counters notification buffer index.
|
||||
NvU32 access_counters_buffer_index;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
|
||||
// region of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t prefetch_only_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
// This is unused for access counter service requests.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. This is a logical or of read_fault_mask and
|
||||
// write_mask and prefetch_only_fault_mask.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. This is used as input for access counter service requests and output
|
||||
// of fault service requests.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
|
||||
// aligned region of a SAM VMA. Used to return ATS fault status.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in
|
||||
// write_fault_mask.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
} faults;
|
||||
|
||||
struct
|
||||
{
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Mask of successfully migrated pages in a UVM_VA_BLOCK_SIZE
|
||||
// aligned region of a SAM VMA.
|
||||
uvm_page_mask_t migrated_mask;
|
||||
|
||||
} access_counters;
|
||||
};
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@@ -249,7 +281,6 @@ typedef struct
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
|
||||
} prefetch_state;
|
||||
|
||||
} uvm_ats_fault_context_t;
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@@ -322,7 +353,7 @@ typedef struct
|
||||
// entries from the GPU buffer
|
||||
NvU32 max_batch_size;
|
||||
|
||||
struct uvm_replayable_fault_buffer_info_struct
|
||||
struct uvm_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@@ -386,7 +417,7 @@ typedef struct
|
||||
uvm_ats_fault_invalidate_t ats_invalidate;
|
||||
} replayable;
|
||||
|
||||
struct uvm_non_replayable_fault_buffer_info_struct
|
||||
struct uvm_non_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@@ -440,7 +471,7 @@ typedef struct
|
||||
|
||||
// Timestamp when prefetch faults where disabled last time
|
||||
NvU64 disable_prefetch_faults_timestamp;
|
||||
} uvm_fault_buffer_info_t;
|
||||
} uvm_fault_buffer_t;
|
||||
|
||||
struct uvm_access_counter_service_batch_context_struct
|
||||
{
|
||||
@@ -448,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
|
||||
NvU32 num_cached_notifications;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
|
||||
NvU32 num_notifications;
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_reverse_map_t *translations;
|
||||
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Helper page mask to compute the accessed pages within a VA block
|
||||
uvm_page_mask_t accessed_pages;
|
||||
@@ -486,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 batch_id;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct uvm_access_counter_buffer_struct
|
||||
{
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
|
||||
} rm;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
// The following values are precomputed by the access counter notification
|
||||
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
|
||||
// uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 translations_per_counter;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
} uvm_gpu_access_counter_type_config_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UvmGpuAccessCntrInfo rm_info;
|
||||
|
||||
// Access counters may have multiple notification buffers.
|
||||
NvU32 index;
|
||||
|
||||
NvU32 max_notifications;
|
||||
|
||||
NvU32 max_batch_size;
|
||||
@@ -532,10 +531,22 @@ typedef struct
|
||||
// may override it to try different configuration values.
|
||||
struct
|
||||
{
|
||||
uvm_gpu_access_counter_type_config_t mimc;
|
||||
uvm_gpu_access_counter_type_config_t momc;
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
} rm;
|
||||
|
||||
NvU32 threshold;
|
||||
// The following values are precomputed by the access counter
|
||||
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
|
||||
// in uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
|
||||
NvU32 threshold;
|
||||
} current_config;
|
||||
|
||||
// Access counter statistics
|
||||
@@ -547,7 +558,7 @@ typedef struct
|
||||
} stats;
|
||||
|
||||
// Ignoring access counters means that notifications are left in the HW
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// are counted since the suspend path inhibits access counter interrupts,
|
||||
// and the resume path needs to know whether to reenable them.
|
||||
NvU32 notifications_ignored_count;
|
||||
@@ -555,13 +566,25 @@ typedef struct
|
||||
// Context structure used to service a GPU access counter batch
|
||||
uvm_access_counter_service_batch_context_t batch_service_context;
|
||||
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR lock
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
} uvm_access_counter_buffer_info_t;
|
||||
struct
|
||||
{
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes.
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR
|
||||
// lock.
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
|
||||
// The service access counters loop breaks after processing the first
|
||||
// batch. It will be retriggered if there are pending notifications, but
|
||||
// it releases the ISR service lock to check certain races that would be
|
||||
// difficult to hit otherwise.
|
||||
bool one_iteration_per_batch;
|
||||
NvU32 sleep_per_iteration_us;
|
||||
} test;
|
||||
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -633,9 +656,10 @@ struct uvm_gpu_struct
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format:
|
||||
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
// ID: 999: GPU-<parent_uuid> GI-<gi_uuid>
|
||||
// UVM_PARENT_GPU_UUID_STRING_LENGTH includes a NULL character but will be
|
||||
// used for a space instead.
|
||||
char name[sizeof("ID: 999: ") - 1 + UVM_PARENT_GPU_UUID_STRING_LENGTH - 1 + 1 + UVM_GPU_UUID_STRING_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
@@ -667,6 +691,11 @@ struct uvm_gpu_struct
|
||||
// ZeroFB testing mode, this will be 0.
|
||||
NvU64 size;
|
||||
|
||||
// Physical start of heap, for SMC enabled GPUs, this is useful to
|
||||
// partition PMM, it is used by HMM to figure out the right translation
|
||||
// between HMM ranges and PMM offsets.
|
||||
NvU64 phys_start;
|
||||
|
||||
// Max (inclusive) physical address of this GPU's memory that the driver
|
||||
// can allocate through PMM (PMA).
|
||||
NvU64 max_allocatable_address;
|
||||
@@ -680,8 +709,14 @@ struct uvm_gpu_struct
|
||||
// True if the platform supports HW coherence and the GPU's memory
|
||||
// is exposed as a NUMA node to the kernel.
|
||||
bool enabled;
|
||||
unsigned int node_id;
|
||||
int node_id;
|
||||
} numa;
|
||||
|
||||
// Physical address of the start of statically mapped fb memory in BAR1
|
||||
NvU64 static_bar1_start;
|
||||
|
||||
// Size of statically mapped fb memory in BAR1.
|
||||
NvU64 static_bar1_size;
|
||||
} mem_info;
|
||||
|
||||
struct
|
||||
@@ -705,18 +740,11 @@ struct uvm_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Mask of peer_gpus set
|
||||
//
|
||||
// We can use a regular processor id because P2P is not allowed between
|
||||
// partitioned GPUs when SMC is enabled
|
||||
// Mask of peer_gpus set.
|
||||
uvm_processor_mask_t peer_gpu_mask;
|
||||
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
// Leaf spinlock used to synchronize access to peer_gpu_mask.
|
||||
uvm_spinlock_t peer_gpu_lock;
|
||||
} peer_info;
|
||||
|
||||
// Maximum number of subcontexts supported
|
||||
@@ -791,14 +819,6 @@ struct uvm_gpu_struct
|
||||
uvm_bit_locks_t bitlocks;
|
||||
} sysmem_mappings;
|
||||
|
||||
// Reverse lookup table used to query the user mapping associated with a
|
||||
// sysmem (DMA) physical address.
|
||||
//
|
||||
// The system memory mapping information referred to by this field is
|
||||
// different from that of sysmem_mappings, because it relates to user
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
|
||||
@@ -847,6 +867,38 @@ struct uvm_gpu_struct
|
||||
NvBool *error_notifier;
|
||||
} ecc;
|
||||
|
||||
// NVLINK STO recovery handling
|
||||
// In order to trap STO errors as soon as possible the driver has the hw
|
||||
// interrupt register mapped directly. If an STO interrupt is ever noticed
|
||||
// to be pending, then the UVM driver needs to:
|
||||
//
|
||||
// 1) ask RM to service interrupts, and then
|
||||
// 2) inspect the NVLINK error notifier state.
|
||||
//
|
||||
// Notably, checking for channel errors is not enough, because STO errors
|
||||
// can be pending, even after a channel has become idle.
|
||||
//
|
||||
// See more details in uvm_gpu_check_nvlink_error().
|
||||
struct
|
||||
{
|
||||
// Does the GPU have NVLINK STO recovery enabled?
|
||||
bool enabled;
|
||||
|
||||
// Artificially injected error for testing
|
||||
atomic_t injected_error;
|
||||
|
||||
// Direct mapping of the 32-bit part of the hw interrupt tree that has
|
||||
// the NVLINK error bits.
|
||||
volatile NvU32 *hw_interrupt_tree_location;
|
||||
|
||||
// Mask to get the NVLINK error interrupt bits from the 32-bits above.
|
||||
NvU32 mask;
|
||||
|
||||
// Set to true by RM when a fatal NVLINK error is encountered (requires
|
||||
// asking RM to service pending interrupts to be current).
|
||||
NvBool *error_notifier;
|
||||
} nvlink_status;
|
||||
|
||||
struct
|
||||
{
|
||||
NvU32 swizz_id;
|
||||
@@ -859,16 +911,19 @@ struct uvm_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/"
|
||||
struct proc_dir_entry *dir;
|
||||
|
||||
// "gpus/${gpu_id}" -> "UVM-GPU-${physical-UUID}/${sub_processor_index}"
|
||||
struct proc_dir_entry *dir_symlink;
|
||||
|
||||
// The GPU instance UUID symlink if SMC is enabled.
|
||||
// The GPU instance UUID symlink.
|
||||
// "gpus/UVM-GI-${GI-UUID}" ->
|
||||
// "UVM-GPU-${physical-UUID}/${sub_processor_index}"
|
||||
struct proc_dir_entry *gpu_instance_uuid_symlink;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/info"
|
||||
struct proc_dir_entry *info_file;
|
||||
|
||||
struct proc_dir_entry *dir_peers;
|
||||
} procfs;
|
||||
|
||||
// Placeholder for per-GPU performance heuristics information
|
||||
@@ -876,8 +931,25 @@ struct uvm_gpu_struct
|
||||
|
||||
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
|
||||
// Have we initialised device p2p pages.
|
||||
bool device_p2p_initialised;
|
||||
|
||||
// Used to protect allocation of p2p_mem and assignment of the page
|
||||
// zone_device_data fields.
|
||||
uvm_mutex_t device_p2p_lock;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool access_counters_alloc_buffer;
|
||||
bool access_counters_alloc_block_context;
|
||||
bool isr_access_counters_alloc;
|
||||
bool isr_access_counters_alloc_stats_cpu;
|
||||
bool access_counters_batch_context_notifications;
|
||||
bool access_counters_batch_context_notification_cache;
|
||||
} uvm_test_parent_gpu_inject_error_t;
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
@@ -886,8 +958,8 @@ struct uvm_gpu_struct
|
||||
struct uvm_parent_gpu_struct
|
||||
{
|
||||
// Reference count for how many places are holding on to a parent GPU
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// registered, but there are brief periods when they are not registered,
|
||||
// such as during interrupt handling, and in add_gpu() or remove_gpu().
|
||||
nv_kref_t gpu_kref;
|
||||
@@ -897,7 +969,7 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
|
||||
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// usable child GPU in bottom-halves.
|
||||
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
@@ -905,7 +977,7 @@ struct uvm_parent_gpu_struct
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name including the uvm gpu id, ascii name from RM and uuid
|
||||
char name[UVM_GPU_NICE_NAME_BUFFER_LENGTH];
|
||||
char name[sizeof("ID 999: : ") - 1 + UVM_GPU_NAME_LENGTH + UVM_PARENT_GPU_UUID_STRING_LENGTH];
|
||||
|
||||
// GPU information and provided by RM (architecture, implementation,
|
||||
// hardware classes, etc.).
|
||||
@@ -941,6 +1013,13 @@ struct uvm_parent_gpu_struct
|
||||
// Do not read this field directly, use uvm_gpu_device_handle instead.
|
||||
uvmGpuDeviceHandle rm_device;
|
||||
|
||||
// Total amount of physical memory available on the parent GPU.
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
#endif
|
||||
|
||||
// The physical address range addressable by the GPU
|
||||
//
|
||||
// The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
|
||||
@@ -967,6 +1046,8 @@ struct uvm_parent_gpu_struct
|
||||
// Whether CE supports physical addressing mode for writes to vidmem
|
||||
bool ce_phys_vidmem_write_supported;
|
||||
|
||||
// Addressing mode(s) supported for CE transfers between this GPU and its
|
||||
// peers: none, physical only, physical and virtual, etc.
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Virtualization mode of the GPU.
|
||||
@@ -991,11 +1072,6 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// If this is true, physical address based access counter notifications are
|
||||
// potentially generated. If false, only virtual address based notifications
|
||||
// are generated (assuming access_counters_supported is true too).
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@@ -1056,6 +1132,15 @@ struct uvm_parent_gpu_struct
|
||||
// Indicates whether the GPU can map sysmem with pages larger than 4k
|
||||
bool can_map_sysmem_with_large_pages;
|
||||
|
||||
struct
|
||||
{
|
||||
// If true, the granularity of key rotation is a single channel. If
|
||||
// false, the key replacement affects all channels on the engine. The
|
||||
// supported granularity is dependent on the number of key slots
|
||||
// available in HW.
|
||||
bool per_channel_key_rotation;
|
||||
} conf_computing;
|
||||
|
||||
// VA base and size of the RM managed part of the internal UVM VA space.
|
||||
//
|
||||
// The internal UVM VA is shared with RM by RM controlling some of the top
|
||||
@@ -1068,6 +1153,11 @@ struct uvm_parent_gpu_struct
|
||||
NvU64 rm_va_base;
|
||||
NvU64 rm_va_size;
|
||||
|
||||
// Base and size of the GPU VA space used for peer identity mappings,
|
||||
// it is used only if peer_copy_mode is UVM_GPU_PEER_COPY_MODE_VIRTUAL.
|
||||
NvU64 peer_va_base;
|
||||
NvU64 peer_va_size;
|
||||
|
||||
// Base and size of the GPU VA used for uvm_mem_t allocations mapped in the
|
||||
// internal address_space_tree.
|
||||
NvU64 uvm_mem_va_base;
|
||||
@@ -1087,27 +1177,33 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// "gpus/UVM-GPU-${physical-UUID}/"
|
||||
struct proc_dir_entry *dir;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/fault_stats"
|
||||
struct proc_dir_entry *fault_stats_file;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/access_counters"
|
||||
struct proc_dir_entry *access_counters_file;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/peers/"
|
||||
struct proc_dir_entry *dir_peers;
|
||||
} procfs;
|
||||
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
// This is only valid if supports_replayable_faults is set to true.
|
||||
uvm_fault_buffer_t fault_buffer;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
// This is only valid if supports_access_counters is set to true. This array
|
||||
// has rm_info.accessCntrBufferCount entries.
|
||||
uvm_access_counter_buffer_t *access_counter_buffer;
|
||||
uvm_mutex_t access_counters_enablement_lock;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
NvU32 utlb_per_gpc_count;
|
||||
@@ -1192,6 +1288,10 @@ struct uvm_parent_gpu_struct
|
||||
// 47-bit fabric memory physical offset that peer gpus need to access
|
||||
// to read a peer's memory
|
||||
NvU64 fabric_memory_window_start;
|
||||
|
||||
// 47-bit fabric memory physical offset that peer gpus need to access
|
||||
// to read remote EGM memory.
|
||||
NvU64 egm_fabric_memory_window_start;
|
||||
} nvswitch_info;
|
||||
|
||||
struct
|
||||
@@ -1220,6 +1320,24 @@ struct uvm_parent_gpu_struct
|
||||
unsigned long smmu_prod;
|
||||
unsigned long smmu_cons;
|
||||
} smmu_war;
|
||||
|
||||
struct
|
||||
{
|
||||
// Is EGM support enabled on this GPU.
|
||||
bool enabled;
|
||||
|
||||
// Local EGM peer ID. This ID is used to route EGM memory accesses to
|
||||
// the local CPU socket.
|
||||
NvU8 local_peer_id;
|
||||
|
||||
// EGM base address of the EGM carveout for remote EGM accesses.
|
||||
// The base address is used when computing PTE PA address values for
|
||||
// accesses to the local CPU socket's EGM memory from other peer
|
||||
// GPUs.
|
||||
NvU64 base_address;
|
||||
} egm;
|
||||
|
||||
uvm_test_parent_gpu_inject_error_t test;
|
||||
};
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -1239,42 +1357,71 @@ static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
|
||||
return gpu->parent->rm_device;
|
||||
}
|
||||
|
||||
struct uvm_gpu_peer_struct
|
||||
typedef struct
|
||||
{
|
||||
// ref_count also controls state maintained in each GPU instance
|
||||
// (uvm_gpu_t). See init_peer_access().
|
||||
NvU64 ref_count;
|
||||
} uvm_gpu_peer_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// The fields in this global structure can only be inspected under one of
|
||||
// the following conditions:
|
||||
//
|
||||
// - The VA space lock is held for either read or write, both GPUs are
|
||||
// registered in the VA space, and the corresponding bit in the
|
||||
// - The VA space lock is held for either read or write, both parent GPUs
|
||||
// are registered in the VA space, and the corresponding bit in the
|
||||
// va_space.enabled_peers bitmap is set.
|
||||
//
|
||||
// - The global lock is held.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be SMC peers and were both retained.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be NVLINK peers and were both retained.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// detected to be NVLINK peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was called.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
|
||||
// called.
|
||||
//
|
||||
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be read from the original GPU's peer_gpus table. The fields
|
||||
// will not change while the lock is held, but they may no longer be valid
|
||||
// because the other GPU might be in teardown.
|
||||
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be referred from the original GPU's peer_gpu_mask reference.
|
||||
// The fields will not change while the lock is held, but they may no
|
||||
// longer be valid because the other GPU might be in teardown.
|
||||
|
||||
// Peer Id associated with this device w.r.t. to a peer GPU.
|
||||
// This field is used to determine when this struct has been initialized
|
||||
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
|
||||
// PCIe peers are initialized when retain_pcie_peers_from_uuids() is called.
|
||||
NvU64 ref_count;
|
||||
|
||||
// Saved values from UvmGpuP2PCapsParams to be used after GPU instance
|
||||
// creation. This should be per GPU instance since LCEs are associated with
|
||||
// GPU instances, not parent GPUs, but for now MIG is not supported for
|
||||
// NVLINK peers so RM associates this state with the parent GPUs. This will
|
||||
// need to be revisited if that NVLINK MIG peer support is added.
|
||||
NvU8 optimalNvlinkWriteCEs[2];
|
||||
|
||||
// Peer Id associated with this device with respect to a peer parent GPU.
|
||||
// Note: peerId (A -> B) != peerId (B -> A)
|
||||
// peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
|
||||
// peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
NvU8 peer_ids[2];
|
||||
|
||||
// The link type between the peer GPUs, currently either PCIe or NVLINK.
|
||||
// This field is used to determine the when this peer struct has been
|
||||
// initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
|
||||
// initialized at GPU registration time. PCIe peers are initialized when
|
||||
// the refcount below goes from 0 to 1.
|
||||
// EGM peer Id associated with this device w.r.t. a peer GPU.
|
||||
// Note: egmPeerId (A -> B) != egmPeerId (B -> A)
|
||||
// egm_peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
|
||||
// egm_peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
//
|
||||
// Unlike VIDMEM peers, EGM peers are not symmetric. This means that if
|
||||
// one of the GPUs is EGM-enabled, it does not automatically mean that
|
||||
// the other is also EGM-enabled. Therefore, an EGM peer Ids are only
|
||||
// valid if the peer GPU is EGM-enabled, i.e. egm_peer_id[0] is valid
|
||||
// iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
|
||||
NvU8 egm_peer_ids[2];
|
||||
|
||||
// The link type between the peer parent GPUs, currently either PCIe or
|
||||
// NVLINK.
|
||||
uvm_gpu_link_type_t link_type;
|
||||
|
||||
// Maximum unidirectional bandwidth between the peers in megabytes per
|
||||
@@ -1282,10 +1429,6 @@ struct uvm_gpu_peer_struct
|
||||
// See UvmGpuP2PCapsParams.
|
||||
NvU32 total_link_line_rate_mbyte_per_s;
|
||||
|
||||
// For PCIe, the number of times that this has been retained by a VA space.
|
||||
// For NVLINK this will always be 1.
|
||||
NvU64 ref_count;
|
||||
|
||||
// This handle gets populated when enable_peer_access successfully creates
|
||||
// an NV50_P2P object. disable_peer_access resets the same on the object
|
||||
// deletion.
|
||||
@@ -1299,9 +1442,13 @@ struct uvm_gpu_peer_struct
|
||||
// GPU-A <-> GPU-B link is bidirectional, pairs[x][0] is always the
|
||||
// local GPU, while pairs[x][1] is the remote GPU. The table shall be
|
||||
// filled like so: [[GPU-A, GPU-B], [GPU-B, GPU-A]].
|
||||
uvm_gpu_t *pairs[2][2];
|
||||
uvm_parent_gpu_t *pairs[2][2];
|
||||
} procfs;
|
||||
};
|
||||
|
||||
// Peer-to-peer state for MIG instance pairs between two different parent
|
||||
// GPUs.
|
||||
uvm_gpu_peer_t gpu_peers[UVM_MAX_UNIQUE_SUB_PROCESSOR_PAIRS];
|
||||
} uvm_parent_gpu_peer_t;
|
||||
|
||||
// Initialize global gpu state
|
||||
NV_STATUS uvm_gpu_init(void);
|
||||
@@ -1315,7 +1462,9 @@ void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
if (!gpu->mem_info.numa.enabled)
|
||||
UVM_ASSERT(gpu->mem_info.numa.node_id == NUMA_NO_NODE);
|
||||
|
||||
return gpu->mem_info.numa.node_id;
|
||||
}
|
||||
|
||||
@@ -1324,6 +1473,7 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
|
||||
unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
|
||||
unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
|
||||
UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
|
||||
UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);
|
||||
@@ -1350,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// function, and is only intended for use by the top-half ISR, or other very
|
||||
// limited cases.
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
|
||||
@@ -1361,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
|
||||
// LOCKING: Takes and releases the global lock for the caller.
|
||||
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out);
|
||||
|
||||
// Retain a gpu which is known to already be retained. Does NOT require the
|
||||
@@ -1380,12 +1531,12 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
|
||||
return atomic64_read(&gpu->retained_count);
|
||||
}
|
||||
|
||||
// Decrease the refcount on the parent GPU object, and actually delete the object
|
||||
// if the refcount hits zero.
|
||||
// Decrease the refcount on the parent GPU object, and actually delete the
|
||||
// object if the refcount hits zero.
|
||||
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
|
||||
|
||||
// Calculates peer table index using GPU ids.
|
||||
NvU32 uvm_gpu_peer_table_index(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
|
||||
NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);
|
||||
|
||||
// Either retains an existing PCIe peer entry or creates a new one. In both
|
||||
// cases the two GPUs are also each retained.
|
||||
@@ -1396,35 +1547,41 @@ NV_STATUS uvm_gpu_retain_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
// LOCKING: requires the global lock to be held
|
||||
void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
|
||||
uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0, uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
// Get the aperture for local_gpu to use to map memory resident on remote_gpu.
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
// Returns the physical address for use by accessing_gpu of a vidmem allocation
|
||||
// on the peer owning_gpu. This address can be used for making PTEs on
|
||||
// accessing_gpu, but not for copying between the two GPUs. For that, use
|
||||
// uvm_gpu_peer_copy_address.
|
||||
uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
// Returns the physical or virtual address for use by accessing_gpu to copy to/
|
||||
// from a vidmem allocation on the peer owning_gpu. This may be different from
|
||||
// uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
|
||||
// physical memory directly.
|
||||
uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
|
||||
|
||||
// Get the P2P capabilities between the given gpus
|
||||
static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
// Return the reference count for the P2P state between the given GPUs.
|
||||
// The two GPUs must have different parents.
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
|
||||
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
|
||||
// NUMA node that remote_gpu is attached to.
|
||||
// Note that local_gpu can be equal to remote_gpu when memory is resident in
|
||||
// CPU NUMA node local to local_gpu. In this case, the local EGM peer ID will
|
||||
// be used.
|
||||
uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent_gpu_t *remote_gpu);
|
||||
|
||||
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
|
||||
}
|
||||
UVM_ASSERT(gpu0 != gpu1);
|
||||
|
||||
static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
if (gpu0->parent->nvswitch_info.is_nvswitch_connected && gpu1->parent->nvswitch_info.is_nvswitch_connected) {
|
||||
UVM_ASSERT(uvm_gpu_peer_caps(gpu0, gpu1)->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
return gpu0->parent == gpu1->parent;
|
||||
}
|
||||
|
||||
@@ -1460,8 +1617,8 @@ static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_par
|
||||
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
|
||||
}
|
||||
|
||||
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
|
||||
// for CE access.
|
||||
// Given a GPU, CPU, or EGM PEER physical address (not VIDMEM peer), retrieve an
|
||||
// address suitable for CE access.
|
||||
static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
|
||||
{
|
||||
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
|
||||
@@ -1483,6 +1640,13 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
|
||||
return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
|
||||
}
|
||||
|
||||
// Check whether the provided address points to peer memory:
|
||||
// * Physical address using one of the PEER apertures
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent
|
||||
// memory
|
||||
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
|
||||
|
||||
// Check for ECC errors
|
||||
//
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
@@ -1495,6 +1659,23 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
|
||||
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for NVLINK errors
|
||||
//
|
||||
// Inject NVLINK error
|
||||
NV_STATUS uvm_gpu_inject_nvlink_error(uvm_gpu_t *gpu, UVM_TEST_NVLINK_ERROR_TYPE error_type);
|
||||
|
||||
NV_STATUS uvm_gpu_get_injected_nvlink_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for NVLINK errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
|
||||
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
//
|
||||
// size has to be aligned to PAGE_SIZE.
|
||||
@@ -1595,9 +1776,6 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Debug print of GPU properties
|
||||
void uvm_gpu_print(uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
// faults.
|
||||
@@ -1637,4 +1815,7 @@ typedef enum
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
} uvm_gpu_buffer_flush_mode_t;
|
||||
|
||||
// PCIe BAR containing static framebuffer memory mappings for PCIe P2P
|
||||
int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
|
||||
|
||||
#endif // __UVM_GPU_H__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -27,11 +27,11 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
//
|
||||
// When uningoring, the interrupt conditions will be re-evaluated to trigger
|
||||
// processing of buffered notifications, if any exist.
|
||||
//
|
||||
// All parent_gpu's notifications buffers are affected.
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
|
||||
|
||||
// Return whether the VA space has access counter migrations enabled. The
|
||||
// caller must ensure that the VA space cannot go away.
|
||||
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
|
||||
|
||||
// Global perf initialization/cleanup functions
|
||||
// Global access counters initialization/cleanup functions.
|
||||
NV_STATUS uvm_access_counters_init(void);
|
||||
void uvm_access_counters_exit(void);
|
||||
|
||||
// Global perf initialization/cleanup functions.
|
||||
NV_STATUS uvm_perf_access_counters_init(void);
|
||||
void uvm_perf_access_counters_exit(void);
|
||||
|
||||
// VA space Initialization/cleanup functions. See comments in
|
||||
// VA space initialization/cleanup functions. See comments in
|
||||
// uvm_perf_heuristics.h
|
||||
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
|
||||
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
|
||||
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
// counters are currently enabled. The hardware notifications and interrupts on
|
||||
// the GPU are enabled the first time any VA space invokes
|
||||
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
|
||||
// uvm_parent_gpu_access_counters_disable().
|
||||
// uvm_gpu_access_counters_disable().
|
||||
//
|
||||
// Locking: the VA space lock must not be held by the caller since these
|
||||
// functions may take the access counters ISR lock.
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
|
||||
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif // __UVM_GPU_ACCESS_COUNTERS_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
// On Volta, accessCntrBufferCount is > 0, but we don't support access
|
||||
// counters in UVM (access_counters_supported is cleared during HAL
|
||||
// initialization.) This check prevents the top-half from accessing
|
||||
// unallocated memory.
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return 0;
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
if (!parent_gpu->isr.access_counters.handling_ref_count)
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
|
||||
return 0;
|
||||
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.access_counters.service_lock.sem);
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
|
||||
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled to avoid an interrupt storm
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.access_counters.bottom_half_q_item);
|
||||
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
|
||||
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
|
||||
// did, back to RM, via the return code:
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
|
||||
// UVM is given a chance to handle the interrupt, before most of the RM
|
||||
// processing. UVM communicates what it did, back to RM, via the return code:
|
||||
//
|
||||
// NV_OK:
|
||||
// UVM handled an interrupt.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED:
|
||||
// UVM did not schedule a bottom half, because it was unable to get the locks it
|
||||
// needed, but there is still UVM work to be done. RM will return "not handled" to the
|
||||
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
|
||||
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
|
||||
// processing continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining (GET != PUT in
|
||||
// the fault buffer).
|
||||
// UVM did not schedule a bottom half, because it was unable to get the
|
||||
// locks it needed, but there is still UVM work to be done. RM will
|
||||
// return "not handled" to the Linux kernel, *unless* RM handled other
|
||||
// faults in its top half. In that case, the fact that UVM did not
|
||||
// handle its interrupt is lost. However, life and interrupt processing
|
||||
// continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining
|
||||
// (GET != PUT in the fault buffer).
|
||||
//
|
||||
// NV_ERR_NO_INTR_PENDING:
|
||||
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
|
||||
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
|
||||
// available for the future. RM's interrupt handling tends to evolve as new chips and
|
||||
// new interrupts get created.
|
||||
// UVM did not find any work to do. Currently this is handled in RM in
|
||||
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
|
||||
// However, the extra precision is available for the future. RM's
|
||||
// interrupt handling tends to evolve as new chips and new interrupts
|
||||
// get created.
|
||||
|
||||
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
unsigned num_handlers_scheduled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
|
||||
if (!in_interrupt() && in_atomic()) {
|
||||
// Early-out if we're not in interrupt context, but memory allocations
|
||||
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without
|
||||
// us noticing.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
++parent_gpu->isr.interrupt_count;
|
||||
|
||||
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
|
||||
|
||||
if (num_handlers_scheduled == 0) {
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
|
||||
return errno_to_nv_status(nv_kthread_q_init(queue, name));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
notif_buf_index);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
|
||||
num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.replayable_faults.handling = true;
|
||||
|
||||
@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.non_replayable_faults.handling = true;
|
||||
|
||||
@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(index_count > 0);
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
|
||||
index_count);
|
||||
if (!parent_gpu->access_counter_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
|
||||
if (!parent_gpu->isr.access_counters)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
|
||||
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
parent_gpu);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters.handling_ref_count = 0;
|
||||
parent_gpu->isr.access_counters.stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (parent_gpu->isr.access_counters) {
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Now that the GPU is safely out of the global table, lock the GPU and mark
|
||||
// it as no longer handling interrupts so the top half knows not to schedule
|
||||
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
// It is safe to deinitialize access counters even if they have not
|
||||
// been successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
|
||||
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
block_context = access_counter->batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
|
||||
if (parent_gpu->isr.access_counters)
|
||||
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.access_counters);
|
||||
uvm_kvfree(parent_gpu->access_counter_buffer);
|
||||
}
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
|
||||
|
||||
static void access_counters_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
unsigned int cpu;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
|
||||
// Multiple bottom halves for counter notifications can be running
|
||||
// concurrently, but only one can be running this function for a given GPU
|
||||
// since we enter with the access_counters_isr_lock held.
|
||||
// concurrently, but only one per-notification-buffer (i.e.,
|
||||
// notif_buf_index) can be running this function for a given GPU since we
|
||||
// enter with the per-notification-buffer access_counters_isr_lock held.
|
||||
cpu = get_cpu();
|
||||
++parent_gpu->isr.access_counters.stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_parent_gpu_service_access_counters(parent_gpu);
|
||||
uvm_service_access_counters(access_counters);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
@@ -612,40 +699,35 @@ static void access_counters_isr_bottom_half_entry(void *args)
|
||||
UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
|
||||
}
|
||||
|
||||
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
|
||||
// the replayable fault interrupt by updating GET. This is because, in this
|
||||
// configuration, GET is a dummy register used to inform GSP-RM (the owner
|
||||
// of the HW replayable fault buffer) of the latest entry consumed by the
|
||||
// UVM driver. The real GET register is owned by GSP-RM.
|
||||
//
|
||||
// The retriggering of a replayable faults bottom half happens then
|
||||
// manually, by scheduling a bottom half for later if there is any pending
|
||||
// work in the fault buffer accessible by UVM. The retriggering adddresses
|
||||
// two problematic scenarios caused by GET updates not setting any
|
||||
// interrupt:
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
// replayable fault to be triggered in a non-CC setup, because the updated
|
||||
// value of GET is different from PUT. But this not the case in Confidential
|
||||
// Computing, so a bottom half needs to be manually scheduled in order to
|
||||
// ensure that all faults are serviced.
|
||||
//
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
bool retrigger = false;
|
||||
|
||||
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
|
||||
// the replayable fault interrupt by updating GET. This is because, in this
|
||||
// configuration, GET is a dummy register used to inform GSP-RM (the owner
|
||||
// of the HW replayable fault buffer) of the latest entry consumed by the
|
||||
// UVM driver. The real GET register is owned by GSP-RM.
|
||||
//
|
||||
// The retriggering of a replayable faults bottom half happens then
|
||||
// manually, by scheduling a bottom half for later if there is any pending
|
||||
// work in the fault buffer accessible by UVM. The retriggering adddresses
|
||||
// two problematic scenarios caused by GET updates not setting any
|
||||
// interrupt:
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
// replayable fault to be triggered in a non-CC setup, because the updated
|
||||
// value of GET is different from PUT. But this not the case in Confidential
|
||||
// Computing, so a bottom half needs to be manually scheduled in order to
|
||||
// ensure that all faults are serviced.
|
||||
//
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
retrigger = true;
|
||||
|
||||
if (!retrigger)
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
@@ -730,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
// clear_replayable_faults is a no-op for architectures that don't
|
||||
// support pulse-based interrupts.
|
||||
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
|
||||
parent_gpu->fault_buffer_info.replayable.cached_get);
|
||||
parent_gpu->fault_buffer.replayable.cached_get);
|
||||
}
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
@@ -756,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
|
||||
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(access_counters);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_down(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
|
||||
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
uvm_access_counters_intr_enable(access_counters);
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
|
||||
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
|
||||
parent_gpu->access_counter_buffer_info.cached_get);
|
||||
}
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
|
||||
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
// still being held. Otherwise, it would result in a lock order violation.
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
@@ -811,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
|
||||
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
// The read of handling_ref_count could race with a write from
|
||||
@@ -820,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
// ISR lock. But those functions are invoked with the interrupt disabled
|
||||
// (disable_intr_ref_count > 0), so the check always returns false when the
|
||||
// race occurs
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
|
||||
}
|
||||
|
||||
++parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
|
||||
|
||||
--parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -70,8 +70,8 @@ typedef struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU over
|
||||
// its lifetime
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU
|
||||
// over its lifetime.
|
||||
NvU64 bottom_half_count;
|
||||
|
||||
// A bitmask of the CPUs on which the bottom half has executed. The
|
||||
@@ -110,20 +110,20 @@ typedef struct
|
||||
// bottom-half per interrupt type.
|
||||
nv_kthread_q_t bottom_half_q;
|
||||
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
|
||||
// currently handling them. Taken in both interrupt and process context.
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU
|
||||
// is currently handling them. Taken in both interrupt and process context.
|
||||
uvm_spinlock_irqsave_t interrupts_lock;
|
||||
|
||||
uvm_intr_handler_t replayable_faults;
|
||||
uvm_intr_handler_t non_replayable_faults;
|
||||
uvm_intr_handler_t access_counters;
|
||||
uvm_intr_handler_t *access_counters;
|
||||
|
||||
// Kernel thread used to kill channels on fatal non-replayable faults.
|
||||
// This is needed because we cannot call into RM from the bottom-half to
|
||||
// avoid deadlocks.
|
||||
nv_kthread_q_t kill_channel_q;
|
||||
|
||||
// Number of top-half ISRs called for this GPU over its lifetime
|
||||
// Number of top-half ISRs called for this GPU over its lifetime.
|
||||
NvU64 interrupt_count;
|
||||
} uvm_isr_info_t;
|
||||
|
||||
@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
|
||||
// Initialize ISR handling state
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// removal.
|
||||
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// supported by the GPU) because the top half attempts to take this lock, and we
|
||||
// would cause an interrupt storm if we didn't disable them first.
|
||||
//
|
||||
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
|
||||
// non-top/bottom half threads, this can be called by any thread.
|
||||
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Lock/unlock routines for non-replayable faults. These do not need to prevent
|
||||
// interrupt storms since the GPU fault buffers for non-replayable faults are
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// under the parent need to have been previously retained.
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Increments the reference count tracking whether access counter interrupts
|
||||
// should be disabled. The caller is guaranteed that access counter interrupts
|
||||
// are disabled upon return. Interrupts might already be disabled prior to
|
||||
// making this call. Each call is ref-counted, so this must be paired with a
|
||||
// call to uvm_parent_gpu_access_counters_intr_enable().
|
||||
// call to uvm_access_counters_intr_enable().
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Decrements the reference count tracking whether access counter interrupts
|
||||
// should be disabled. Only once the count reaches 0 are the HW interrupts
|
||||
// actually enabled, so this call does not guarantee that the interrupts have
|
||||
// been re-enabled upon return.
|
||||
//
|
||||
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
|
||||
// calling this function.
|
||||
// uvm_access_counters_intr_disable() must have been called prior to calling
|
||||
// this function.
|
||||
//
|
||||
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
|
||||
// the interrupt.
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
|
||||
// are registered. This should only be called from bottom halves or if the
|
||||
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
|
||||
//
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_ISR_H__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -119,18 +119,18 @@
|
||||
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy = NULL;
|
||||
non_replayable_faults->fault_cache = NULL;
|
||||
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy =
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
|
||||
if (!non_replayable_faults->shadow_buffer_copy)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
|
||||
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
if (non_replayable_faults->fault_cache) {
|
||||
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
|
||||
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
|
||||
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
&has_pending_faults);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
NV_STATUS status;
|
||||
NvU32 i;
|
||||
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
|
||||
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
current_hw_entry,
|
||||
cached_faults);
|
||||
|
||||
@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
service_context,
|
||||
hmm_migratable));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
|
||||
&va_block->tracker);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
|
||||
{
|
||||
uvm_va_space_t *va_space = fault_entry->va_space;
|
||||
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
|
||||
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
|
||||
|
||||
@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||||
NV_STATUS status = lookup_status;
|
||||
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
|
||||
@@ -579,8 +579,10 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
|
||||
uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
|
||||
|
||||
uvm_page_mask_zero(&ats_context->read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.accessed_mask);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
|
||||
|
||||
@@ -597,14 +599,17 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(fault_address);
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
|
||||
uvm_page_index_t page_index = (fault_address - base) / PAGE_SIZE;
|
||||
uvm_page_mask_t *fault_mask = (fault_access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) ?
|
||||
&ats_context->write_fault_mask :
|
||||
&ats_context->read_fault_mask;
|
||||
&ats_context->faults.write_fault_mask :
|
||||
&ats_context->faults.read_fault_mask;
|
||||
|
||||
uvm_page_mask_set(fault_mask, page_index);
|
||||
|
||||
uvm_page_mask_set(accessed_mask, page_index);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, ats_context);
|
||||
if (status == NV_OK) {
|
||||
// Invalidate ATS TLB entries if needed
|
||||
@@ -644,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
|
||||
struct mm_struct *mm;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
|
||||
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
|
||||
@@ -752,7 +757,7 @@ exit_no_channel:
|
||||
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.non_replayable.block_service_context;
|
||||
NV_STATUS status;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@@ -789,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
|
||||
// the power management resume path.
|
||||
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
// Read the current get/put pointers, as this might not be the first time
|
||||
// we take control of the fault buffer since the GPU was initialized,
|
||||
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
|
||||
// (Re-)enable fault prefetching
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
else
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
// Check provided module parameter value
|
||||
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer_info.max_batch_size);
|
||||
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer.max_batch_size);
|
||||
}
|
||||
|
||||
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
|
||||
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
|
||||
|
||||
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
}
|
||||
|
||||
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
|
||||
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
}
|
||||
|
||||
// Re-enable fault prefetching just in case it was disabled in a previous run
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
|
||||
fault_buffer_reinit_replayable_faults(parent_gpu);
|
||||
|
||||
@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
if (batch_context->fault_cache) {
|
||||
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_tracker_deinit(&replayable_faults->replay_tracker);
|
||||
}
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
// Re-enable prefetch faults in case we disabled them
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
// when it returns an error. Set the buffer handle to zero as it is
|
||||
// used by the deinitialization logic to determine if it was correctly
|
||||
// initialized.
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
fault_buffer_deinit_replayable_faults(parent_gpu);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last pass
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last
|
||||
// pass
|
||||
if (replayable_faults->cached_get != replayable_faults->cached_put)
|
||||
return true;
|
||||
|
||||
@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
|
||||
|
||||
UVM_ASSERT(tracker != NULL);
|
||||
|
||||
@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_gpu_phys_address_t pdb;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
NvU64 offset;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_tracker_t *tracker = NULL;
|
||||
|
||||
if (batch_context)
|
||||
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
|
||||
return NV_OK;
|
||||
|
||||
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
@@ -636,6 +637,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
}
|
||||
|
||||
@@ -644,7 +646,15 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
// Channels might be idle (e.g. in teardown) so check for errors
|
||||
// actively. In that case the gpu pointer is valid.
|
||||
status = gpu ? uvm_channel_manager_check_errors(gpu->channel_manager) : uvm_global_get_status();
|
||||
if (status != NV_OK) {
|
||||
write_get(parent_gpu, get);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
fault_buffer_skip_replayable_entry(parent_gpu, get);
|
||||
++get;
|
||||
@@ -843,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault_cache;
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
|
||||
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
|
||||
|
||||
@@ -878,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
// Parse until get != put and have enough space to cache.
|
||||
while ((get != put) &&
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
|
||||
bool is_same_instance_ptr = true;
|
||||
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
|
||||
uvm_fault_utlb_info_t *current_tlb;
|
||||
@@ -890,6 +900,10 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
|
||||
status = uvm_global_get_status();
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Prevent later accesses being moved above the read of the valid bit
|
||||
@@ -1372,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_page_index_t last_page_index;
|
||||
NvU32 page_fault_count = 0;
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
|
||||
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
|
||||
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
|
||||
@@ -1410,7 +1424,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
&end);
|
||||
}
|
||||
else {
|
||||
policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
policy = &va_block->managed_range->policy;
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
@@ -1599,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
@@ -1689,11 +1703,11 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
const uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
@@ -1703,7 +1717,7 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
|
||||
|
||||
// Remove prefetched pages from the serviced mask since fault servicing
|
||||
// Remove SW prefetched pages from the serviced mask since fault servicing
|
||||
// failures belonging to prefetch pages need to be ignored.
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
|
||||
|
||||
@@ -1763,8 +1777,9 @@ static void start_new_sub_batch(NvU64 *sub_batch_base,
|
||||
NvU32 fault_index,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_page_mask_zero(&ats_context->read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
|
||||
|
||||
*sub_batch_fault_index = fault_index;
|
||||
*sub_batch_base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
@@ -1784,11 +1799,12 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *previous_entry = NULL;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
bool replay_per_va_block =
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
@@ -1817,7 +1833,9 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// End of sub-batch. Service faults gathered so far.
|
||||
if (fault_address >= (sub_batch_base + UVM_VA_BLOCK_SIZE)) {
|
||||
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask));
|
||||
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) ||
|
||||
!uvm_page_mask_empty(write_fault_mask) ||
|
||||
!uvm_page_mask_empty(prefetch_only_fault_mask));
|
||||
|
||||
status = service_fault_batch_ats_sub_vma(gpu_va_space,
|
||||
vma,
|
||||
@@ -1834,8 +1852,14 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
|
||||
|
||||
if ((access_type <= UVM_FAULT_ACCESS_TYPE_READ) ||
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
|
||||
// Do not check for coalesced access type. If there are multiple
|
||||
// different accesses to an address, we can disregard the prefetch one.
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
|
||||
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
|
||||
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
|
||||
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_READ) ||
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
|
||||
uvm_page_mask_set(read_fault_mask, page_index);
|
||||
|
||||
if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE)
|
||||
@@ -1849,7 +1873,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
(previous_entry->va_space == current_entry->va_space));
|
||||
|
||||
// Service the last sub-batch.
|
||||
if ((status == NV_OK) && (!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask))) {
|
||||
if ((status == NV_OK) &&
|
||||
(!uvm_page_mask_empty(read_fault_mask) ||
|
||||
!uvm_page_mask_empty(write_fault_mask) ||
|
||||
!uvm_page_mask_empty(prefetch_only_fault_mask))) {
|
||||
status = service_fault_batch_ats_sub_vma(gpu_va_space,
|
||||
vma,
|
||||
sub_batch_base,
|
||||
@@ -1930,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
@@ -1959,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
NvU64 outer = ~0ULL;
|
||||
|
||||
UVM_ASSERT(replay_per_va_block ==
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
|
||||
// Limit outer to the minimum of next va_range.start and first
|
||||
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
|
||||
@@ -2020,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
uvm_gpu_t *gpu = batch_context->fatal_gpu;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
@@ -2129,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
const bool hmm_migratable = true;
|
||||
|
||||
@@ -2210,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 i;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@@ -2685,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
// 5- Fetch all faults from buffer
|
||||
// 6- Check what uTLBs are in lockdown mode and can be cancelled
|
||||
// 7- Preprocess faults (order per va_space, fault address, access type)
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
|
||||
// 6.1- If fatal faults are not found, we are done
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as
|
||||
// fatal.
|
||||
// 8.1- If fatal faults are not found, we are done
|
||||
// 9- Search for a uTLB which can be targeted for cancel, as described in
|
||||
// try_to_cancel_utlbs. If found, cancel it.
|
||||
// END LOOP
|
||||
@@ -2700,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
bool first = true;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
// 1) Disable prefetching to avoid new requests keep coming and flooding
|
||||
// the buffer
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
|
||||
|
||||
while (1) {
|
||||
@@ -2821,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
}
|
||||
|
||||
// 10) Re-enable prefetching
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
|
||||
|
||||
if (status == NV_OK)
|
||||
@@ -2858,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
|
||||
// comment in mark_fault_invalid_prefetch(..).
|
||||
// Some tests rely on this logic (and ratio) to correctly disable prefetch
|
||||
// fault reporting. If the logic changes, the tests will have to be changed.
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
|
||||
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
|
||||
(uvm_enable_builtin_tests &&
|
||||
parent_gpu->rm_info.isSimulated &&
|
||||
batch_context->num_invalid_prefetch_faults > 5))) {
|
||||
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
|
||||
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
|
||||
|
||||
// Reenable prefetch faults after some time
|
||||
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
|
||||
@@ -2881,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
NvU32 num_batches = 0;
|
||||
NvU32 num_throttled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
@@ -3004,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3015,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user