Compare commits

...

5 Commits

Author SHA1 Message Date
Bernhard Stoeckner
81fe4fb417 570.86.16 2025-01-30 17:37:03 +01:00
Bernhard Stoeckner
54d69484da 570.86.15 2025-01-27 19:36:56 +01:00
Bernhard Stoeckner
9d0b0414a5 565.77 2024-12-05 16:37:35 +01:00
Bernhard Stoeckner
d5a0858f90 565.57.01 2024-10-22 17:38:58 +02:00
Gaurav Juvekar
ed4be64962 560.35.03 2024-08-19 10:46:21 -07:00
1483 changed files with 398360 additions and 219849 deletions

View File

@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 560.31.02.
version 570.86.16.
## How to Build
@@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
560.31.02 driver release. This can be achieved by installing
570.86.16 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@@ -185,7 +185,7 @@ table below).
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/560.31.02/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.86.16/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
@@ -199,6 +199,7 @@ Subsystem Device ID.
| NVIDIA TITAN RTX | 1E02 |
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
| NVIDIA CMP 50HX | 1E09 |
| Quadro RTX 6000 | 1E30 |
| Quadro RTX 8000 | 1E30 1028 129E |
| Quadro RTX 8000 | 1E30 103C 129E |
@@ -391,6 +392,7 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 2070 | 1F07 |
| NVIDIA GeForce RTX 2060 | 1F08 |
| NVIDIA GeForce GTX 1650 | 1F0A |
| NVIDIA CMP 40HX | 1F0B |
| NVIDIA GeForce RTX 2070 | 1F10 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
@@ -691,6 +693,7 @@ Subsystem Device ID.
| NVIDIA GeForce GTX 1660 | 2184 |
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
| NVIDIA GeForce GTX 1650 | 2188 |
| NVIDIA CMP 30HX | 2189 |
| NVIDIA GeForce GTX 1660 Ti | 2191 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
@@ -751,6 +754,7 @@ Subsystem Device ID.
| NVIDIA H800 | 2324 10DE 17A8 |
| NVIDIA H20 | 2329 10DE 198B |
| NVIDIA H20 | 2329 10DE 198C |
| NVIDIA H20-3e | 232C 10DE 2063 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
@@ -758,9 +762,11 @@ Subsystem Device ID.
| NVIDIA H200 | 2335 10DE 18BF |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA H200 NVL | 233B 10DE 1996 |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
| NVIDIA GH200 120GB | 2342 10DE 1805 |
| NVIDIA GH200 480GB | 2342 10DE 1809 |
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |
@@ -918,6 +924,7 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 4060 | 2882 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
@@ -932,3 +939,9 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
| NVIDIA B200 | 2901 10DE 1999 |
| NVIDIA B200 | 2901 10DE 199B |
| NVIDIA B200 | 2901 10DE 20DA |
| NVIDIA GeForce RTX 5090 | 2B85 |
| NVIDIA GeForce RTX 5090 D | 2B87 |
| NVIDIA GeForce RTX 5080 | 2C02 |

View File

@@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
UBSAN_SANITIZE := y
endif
#
# Command to create a symbolic link, explicitly resolving the symlink target
# to an absolute path to abstract away the difference between Linux < 6.13,
# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
# Linux >= 6.13, where the CWD is the external module source tree.
#
# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
# kernel modules which use precompiled binary object files.
#
quiet_cmd_symlink = SYMLINK $@
cmd_symlink = ln -sf $(abspath $<) $@
$(foreach _module, $(NV_KERNEL_MODULES), \
$(eval include $(src)/$(_module)/$(_module).Kbuild))
@@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.31.02\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.86.16\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@@ -52,6 +52,22 @@ else
endif
endif
# If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
# Look for the compiler specified there, and use it by default, if found.
ifeq ($(origin CC),default)
cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
echo "$$CONFIG_CC_VERSION_TEXT"))
ifneq ($(cc_version_text),)
ifeq ($(shell command -v $(cc_version_text)),)
$(warning WARNING: Unable to locate the compiler $(cc_version_text) \
from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
else
CC=$(cc_version_text)
endif
endif
endif
CC ?= cc
LD ?= ld
OBJDUMP ?= objdump
@@ -65,6 +81,16 @@ else
)
endif
KERNEL_ARCH = $(ARCH)
ifneq ($(filter $(ARCH),i386 x86_64),)
KERNEL_ARCH = x86
else
ifeq ($(filter $(ARCH),arm64 powerpc),)
$(error Unsupported architecture $(ARCH))
endif
endif
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
$(NV_KERNEL_MODULES))
@@ -106,8 +132,9 @@ else
# module symbols on which the Linux kernel's module resolution is dependent
# and hence must be used whenever present.
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
$(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \
$(KERNEL_OUTPUT)/scripts/module.lds
NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))

View File

@@ -32,7 +32,10 @@
typedef enum
{
NV_FIRMWARE_TYPE_GSP,
NV_FIRMWARE_TYPE_GSP_LOG
NV_FIRMWARE_TYPE_GSP_LOG,
#if defined(NV_VMWARE)
NV_FIRMWARE_TYPE_BINDATA
#endif
} nv_firmware_type_t;
typedef enum
@@ -45,6 +48,7 @@ typedef enum
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
NV_FIRMWARE_CHIP_FAMILY_GB20X = 9,
NV_FIRMWARE_CHIP_FAMILY_END,
} nv_firmware_chip_family_t;
@@ -54,6 +58,7 @@ static inline const char *nv_firmware_chip_family_to_string(
{
switch (fw_chip_family) {
case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
case NV_FIRMWARE_CHIP_FAMILY_GB20X: return "gb20x";
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
@@ -84,6 +89,7 @@ static inline const char *nv_firmware_for_chip_family(
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
@@ -104,6 +110,7 @@ static inline const char *nv_firmware_for_chip_family(
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
@@ -119,7 +126,12 @@ static inline const char *nv_firmware_for_chip_family(
return "";
}
}
#if defined(NV_VMWARE)
else if (fw_type == NV_FIRMWARE_TYPE_BINDATA)
{
return NV_FIRMWARE_FOR_NAME("bindata_image");
}
#endif
return "";
}
#endif // defined(NV_FIRMWARE_FOR_NAME)

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -128,6 +128,9 @@ typedef struct nv_ioctl_register_fd
#define NV_DMABUF_EXPORT_MAX_HANDLES 128
#define NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT 0
#define NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE 1
typedef struct nv_ioctl_export_to_dma_buf_fd
{
int fd;
@@ -136,6 +139,7 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
NvU32 numObjects;
NvU32 index;
NvU64 totalSize NV_ALIGN_BYTES(8);
NvU8 mappingType;
NvHandle handles[NV_DMABUF_EXPORT_MAX_HANDLES];
NvU64 offsets[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
NvU64 sizes[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -43,6 +43,8 @@ struct nv_kthread_q
atomic_t main_loop_should_exit;
struct task_struct *q_kthread;
bool is_unload_flush_ongoing;
};
struct nv_kthread_q_item

View File

@@ -231,12 +231,6 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
const char *, va_list);
#endif
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
#include <asm/eeh.h>
#define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled()
#define NV_PCI_ERROR_RECOVERY
#endif
#if defined(NV_ASM_SET_MEMORY_H_PRESENT)
#include <asm/set_memory.h>
#endif
@@ -609,7 +603,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
#define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \
{ \
(ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \
(ptr) = (unsigned long) alloc_pages_node(nid, gfp_mask, order); \
}
#define NV_GET_FREE_PAGES(ptr, order, gfp_mask) \
@@ -724,6 +718,7 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
#endif
}
#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
#define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
#define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff)
#define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start)
@@ -880,16 +875,6 @@ typedef void irqreturn_t;
#define PCI_CAP_ID_EXP 0x10
#endif
/*
* On Linux on PPC64LE enable basic support for Linux PCI error recovery (see
* Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error
* notification and data collection, not actual recovery of the device.
*/
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
#include <asm/eeh.h>
#define NV_PCI_ERROR_RECOVERY
#endif
/*
* If the host OS has page sizes larger than 4KB, we may have a security
* problem. Registers are typically grouped in 4KB pages, but if there are
@@ -951,14 +936,14 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
}
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
{
int ret = -1;
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size,
ret = nv_remap_page_range(vma, start, phys_addr, size,
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
#else
ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT),
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
#endif
return ret;
@@ -1207,6 +1192,7 @@ typedef struct nv_alloc_s {
NvBool physical : 1;
NvBool unencrypted : 1;
NvBool coherent : 1;
NvBool carveout : 1;
} flags;
unsigned int cache_type;
unsigned int num_pages;
@@ -1417,8 +1403,6 @@ typedef struct nv_dma_map_s {
0 ? NV_OK : NV_ERR_OPERATING_SYSTEM)
#endif
typedef struct nv_ibmnpu_info nv_ibmnpu_info_t;
typedef struct nv_work_s {
struct work_struct task;
void *data;
@@ -1466,7 +1450,6 @@ struct nv_dma_device {
} addressable_range;
struct device *dev;
NvBool nvlink;
};
/* Properties of the coherent link */
@@ -1515,9 +1498,6 @@ typedef struct nv_linux_state_s {
struct device *dev;
struct pci_dev *pci_dev;
/* IBM-NPU info associated with this GPU */
nv_ibmnpu_info_t *npu;
/* coherent link information */
coherent_link_info_t coherent_link_info;
@@ -1833,27 +1813,13 @@ static inline int nv_is_control_device(struct inode *inode)
return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
}
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) || defined(NV_DEVICE_VM_BUILD)
#define NV_VGX_HYPER
#if defined(NV_XEN_IOEMU_INJECT_MSI)
#include <xen/ioemu.h>
#endif
#endif
static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index)
{
NvU64 bus_addr = 0;
#if defined(NV_PCI_BUS_ADDRESS_PRESENT)
bus_addr = pci_bus_address(dev, bar_index);
#elif defined(CONFIG_PCI)
struct pci_bus_region region;
pcibios_resource_to_bus(dev, &region, &dev->resource[bar_index]);
bus_addr = region.start;
#endif
return bus_addr;
}
/*
* Decrements the usage count of the allocation, and moves the allocation to
* the given nvlfp's free list if the usage count drops to zero.
@@ -1884,59 +1850,6 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
#endif
/*
* Starting on Power9 systems, DMA addresses for NVLink are no longer
* the same as used over PCIe.
*
* Power9 supports a 56-bit Real Address. This address range is compressed
* when accessed over NVLink to allow the GPU to access all of memory using
* its 47-bit Physical address.
*
* If there is an NPU device present on the system, it implies that NVLink
* sysmem links are present and we need to apply the required address
* conversion for NVLink within the driver.
*
* See Bug 1920398 for further background and details.
*
* Note, a deviation from the documented compression scheme is that the
* upper address bits (i.e. bit 56-63) instead of being set to zero are
* preserved during NVLink address compression so the orignal PCIe DMA
* address can be reconstructed on expansion. These bits can be safely
* ignored on NVLink since they are truncated by the GPU.
*
* Bug 1968345: As a performance enhancement it is the responsibility of
* the caller on PowerPC platforms to check for presence of an NPU device
* before the address transformation is applied.
*/
static inline NvU64 nv_compress_nvlink_addr(NvU64 addr)
{
NvU64 addr47 = addr;
#if defined(NVCPU_PPC64LE)
addr47 = addr & ((1ULL << 43) - 1);
addr47 |= (addr & (0x3ULL << 45)) >> 2;
WARN_ON(addr47 & (1ULL << 44));
addr47 |= (addr & (0x3ULL << 49)) >> 4;
addr47 |= addr & ~((1ULL << 56) - 1);
#endif
return addr47;
}
static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47)
{
NvU64 addr = addr47;
#if defined(NVCPU_PPC64LE)
addr = addr47 & ((1ULL << 43) - 1);
addr |= (addr47 & (3ULL << 43)) << 2;
addr |= (addr47 & (3ULL << 45)) << 4;
addr |= addr47 & ~((1ULL << 56) - 1);
#endif
return addr;
}
// Default flags for ISRs
static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
{

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,5 +36,6 @@ int nv_pci_count_devices(void);
NvU8 nv_find_pci_capability(struct pci_dev *, NvU8);
int nvidia_dev_get_pci_info(const NvU8 *, struct pci_dev **, NvU64 *, NvU64 *);
nv_linux_state_t * find_pci(NvU32, NvU8, NvU8, NvU8);
NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct device *);
#endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -59,6 +59,8 @@ NV_STATUS nv_uvm_resume (void);
void nv_uvm_notify_start_device (const NvU8 *uuid);
void nv_uvm_notify_stop_device (const NvU8 *uuid);
NV_STATUS nv_uvm_event_interrupt (const NvU8 *uuid);
NV_STATUS nv_uvm_drain_P2P (const NvU8 *uuid);
NV_STATUS nv_uvm_resume_P2P (const NvU8 *uuid);
/* Move these to nv.h once implemented by other UNIX platforms */
NvBool nvidia_get_gpuid_list (NvU32 *gpu_ids, NvU32 *gpu_count);

View File

@@ -44,6 +44,7 @@
#include <nv-ioctl.h>
#include <nv-ioctl-numa.h>
#include <nvmisc.h>
#include <os/nv_memory_area.h>
extern nv_cap_t *nvidia_caps_root;
@@ -279,8 +280,7 @@ typedef struct nv_usermap_access_params_s
NvU64 offset;
NvU64 *page_array;
NvU64 num_pages;
NvU64 mmap_start;
NvU64 mmap_size;
MemoryArea memArea;
NvU64 access_start;
NvU64 access_size;
NvU64 remap_prot_extra;
@@ -296,8 +296,7 @@ typedef struct nv_alloc_mapping_context_s {
NvU64 page_index;
NvU64 *page_array;
NvU64 num_pages;
NvU64 mmap_start;
NvU64 mmap_size;
MemoryArea memArea;
NvU64 access_start;
NvU64 access_size;
NvU64 remap_prot_extra;
@@ -330,7 +329,7 @@ typedef struct nv_soc_irq_info_s {
NvS32 ref_count;
} nv_soc_irq_info_t;
#define NV_MAX_SOC_IRQS 6
#define NV_MAX_SOC_IRQS 10
#define NV_MAX_DPAUX_NUM_DEVICES 4
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
@@ -369,6 +368,8 @@ typedef struct nv_state_t
{
NvBool valid;
NvU8 uuid[GPU_UUID_LEN];
NvBool pci_uuid_read_attempted;
NV_STATUS pci_uuid_status;
} nv_uuid_cache;
void *handle;
@@ -480,6 +481,8 @@ typedef struct nv_state_t
/* Bool to check if the GPU has a coherent sysmem link */
NvBool coherent;
/* OS detected GPU has ATS capability */
NvBool ats_support;
/*
* NUMA node ID of the CPU to which the GPU is attached.
* Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
@@ -535,6 +538,7 @@ typedef struct UvmGpuAddressSpaceInfo_tag *nvgpuAddressSpaceInfo_t;
typedef struct UvmGpuAllocInfo_tag *nvgpuAllocInfo_t;
typedef struct UvmGpuP2PCapsParams_tag *nvgpuP2PCapsParams_t;
typedef struct UvmGpuFbInfo_tag *nvgpuFbInfo_t;
typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
@@ -545,6 +549,7 @@ typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
typedef struct UvmGpuExternalMappingInfo_tag *nvgpuExternalMappingInfo_t;
typedef struct UvmGpuExternalPhysAddrInfo_tag *nvgpuExternalPhysAddrInfo_t;
typedef struct UvmGpuChannelResourceInfo_tag *nvgpuChannelResourceInfo_t;
typedef struct UvmGpuChannelInstanceInfo_tag *nvgpuChannelInstanceInfo_t;
typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindParams_t;
@@ -569,7 +574,8 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
#define NV_FLAG_PASSTHRU 0x0080
#define NV_FLAG_SUSPENDED 0x0100
#define NV_FLAG_SOC_IGPU 0x0200
// Unused 0x0400
/* To be set when an FLR needs to be triggered after device shut down. */
#define NV_FLAG_TRIGGER_FLR 0x0400
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
#define NV_FLAG_IN_RECOVERY 0x1000
// Unused 0x2000
@@ -612,6 +618,7 @@ typedef struct
const char *gc6_support;
const char *gcoff_support;
const char *s0ix_status;
const char *db_support;
} nv_power_info_t;
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
@@ -757,6 +764,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
#endif
/*
* driver internal interfaces
*/
@@ -783,7 +791,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, NvBool, void **);
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
@@ -812,7 +820,6 @@ NV_STATUS NV_API_CALL nv_dma_map_mmio (nv_dma_device_t *, NvU64, NvU6
void NV_API_CALL nv_dma_unmap_mmio (nv_dma_device_t *, NvU64, NvU64);
void NV_API_CALL nv_dma_cache_invalidate (nv_dma_device_t *, void *);
void NV_API_CALL nv_dma_enable_nvlink (nv_dma_device_t *);
NvS32 NV_API_CALL nv_start_rc_timer (nv_state_t *);
NvS32 NV_API_CALL nv_stop_rc_timer (nv_state_t *);
@@ -839,9 +846,7 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method (nv_state_t *, NvU32 *, NvU32,
NV_STATUS NV_API_CALL nv_log_error (nv_state_t *, NvU32, const char *, va_list);
NvU64 NV_API_CALL nv_get_dma_start_address (nv_state_t *);
NV_STATUS NV_API_CALL nv_set_primary_vga_status(nv_state_t *);
NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
@@ -854,19 +859,8 @@ void NV_API_CALL nv_put_file_private(void *);
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
void NV_API_CALL nv_wait_for_ibmnpu_rsync(nv_state_t *nv);
void NV_API_CALL nv_ibmnpu_cache_flush_range(nv_state_t *nv, NvU64, NvU64);
void NV_API_CALL nv_p2p_free_platform_data(void *data);
#if defined(NVCPU_PPC64LE)
NV_STATUS NV_API_CALL nv_get_nvlink_line_rate (nv_state_t *, NvU32 *);
#endif
NV_STATUS NV_API_CALL nv_revoke_gpu_mappings (nv_state_t *);
void NV_API_CALL nv_acquire_mmap_lock (nv_state_t *);
void NV_API_CALL nv_release_mmap_lock (nv_state_t *);
@@ -904,6 +898,9 @@ void NV_API_CALL nv_dma_release_dma_buf (nv_dma_buf_t *);
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
NV_STATUS NV_API_CALL nv_schedule_uvm_drain_p2p (NvU8 *);
void NV_API_CALL nv_schedule_uvm_resume_p2p (NvU8 *);
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
@@ -994,18 +991,24 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **);
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *,
NvHandle, NvHandle, MemoryRange,
NvU8, void *, NvBool, MemoryArea *);
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *,
NvHandle, NvHandle, NvU8, void *,
NvBool, MemoryArea);
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *,
nv_state_t *, NvHandle, NvHandle,
NvU8, NvHandle *, NvHandle *,
NvHandle *, void **, NvBool *);
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
@@ -1022,7 +1025,6 @@ NvBool NV_API_CALL rm_is_device_sequestered(nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_check_for_gpu_surprise_removal(nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_set_external_kernel_client_count(nvidia_stack_t *, nv_state_t *, NvBool);
NV_STATUS NV_API_CALL rm_schedule_gpu_wakeup(nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_iommu_needed_for_sriov(nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_disable_iomap_wc(void);
void NV_API_CALL rm_init_dynamic_power_management(nvidia_stack_t *, nv_state_t *, NvBool);
@@ -1039,12 +1041,14 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
NvBool NV_API_CALL rm_is_altstack_in_use(void);
/* vGPU VFIO specific functions */
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
NvU32 *, NvU32 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);

View File

@@ -592,6 +592,14 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
Error codes:
NV_ERR_GENERIC
NV_ERR_NO_MEMORY
NV_ERR_INVALID_STATE
NV_ERR_NOT_SUPPORTED
NV_ERR_NOT_READY
NV_ERR_INVALID_LOCK_STATE
NV_ERR_INVALID_STATE
NV_ERR_NVLINK_FABRIC_NOT_READY
NV_ERR_NVLINK_FABRIC_FAILURE
NV_ERR_GPU_MEMORY_ONLINING_FAILURE
*/
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
UvmGpuCaps *caps);
@@ -1085,6 +1093,22 @@ NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvm
//
void nvUvmInterfaceDeRegisterUvmOps(void);
/*******************************************************************************
nvUvmInterfaceGetNvlinkInfo
Gets NVLINK information from RM.
Arguments:
device[IN] - GPU device handle
nvlinkInfo [OUT] - Pointer to NvlinkInfo structure
Error codes:
NV_ERROR
NV_ERR_INVALID_ARGUMENT
*/
NV_STATUS nvUvmInterfaceGetNvlinkInfo(uvmGpuDeviceHandle device,
UvmGpuNvlinkInfo *nvlinkInfo);
/*******************************************************************************
nvUvmInterfaceP2pObjectCreate
@@ -1161,6 +1185,48 @@ NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
NvU64 size,
UvmGpuExternalMappingInfo *gpuExternalMappingInfo);
/*******************************************************************************
nvUvmInterfaceGetExternalAllocPhysAddrs
The interface builds the RM physical addrs using the provided input parameters.
Arguments:
vaSpace[IN] - vaSpace handle.
hMemory[IN] - Memory handle.
offset [IN] - Offset from the beginning of the allocation
where PTE mappings should begin.
Should be aligned with mappingPagesize
in gpuExternalMappingInfo associated
with the allocation.
size [IN] - Length of the allocation for which PhysAddrs
should be built.
Should be aligned with mappingPagesize
in gpuExternalMappingInfo associated
with the allocation.
size = 0 will be interpreted as the total size
of the allocation.
gpuExternalMappingInfo[IN/OUT] - See nv_uvm_types.h for more information.
Error codes:
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
NV_ERR_INVALID_OBJECT_HANDLE - Invalid memory handle is passed.
NV_ERR_NOT_SUPPORTED - Functionality is not supported (see comments in nv_gpu_ops.c)
NV_ERR_INVALID_BASE - offset is beyond the allocation size
NV_ERR_INVALID_LIMIT - (offset + size) is beyond the allocation size.
NV_ERR_BUFFER_TOO_SMALL - gpuExternalMappingInfo.physAddrBufferSize is insufficient to
store single physAddr.
NV_ERR_NOT_READY - Returned when querying the physAddrs requires a deferred setup
which has not yet completed. It is expected that the caller
will reattempt the call until a different code is returned.
As an example, multi-node systems which require querying
physAddrs from the Fabric Manager may return this code.
*/
NV_STATUS nvUvmInterfaceGetExternalAllocPhysAddrs(uvmGpuAddressSpaceHandle vaSpace,
NvHandle hMemory,
NvU64 offset,
NvU64 size,
UvmGpuExternalPhysAddrInfo *gpuExternalPhysAddrsInfo);
/*******************************************************************************
nvUvmInterfaceRetainChannel
@@ -1462,6 +1528,16 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
char *methodStream,
NvU32 methodStreamSize);
/*******************************************************************************
nvUvmInterfaceReportFatalError
Reports a global fatal error so RM can inform the clients that a node reboot
is necessary to recover from this error. This function can be called from
any lock environment, bottom half or non-interrupt context.
*/
void nvUvmInterfaceReportFatalError(NV_STATUS error);
/*******************************************************************************
Cryptography Services Library (CSL) Interface
*/

View File

@@ -543,6 +543,36 @@ typedef struct UvmGpuExternalMappingInfo_tag
NvU32 pteSize;
} UvmGpuExternalMappingInfo;
typedef struct UvmGpuExternalPhysAddrInfo_tag
{
// In: Virtual permissions. Returns
// NV_ERR_INVALID_ACCESS_TYPE if input is
// inaccurate
UvmRmGpuMappingType mappingType;
// In: Size of the buffer to store PhysAddrs (in bytes).
NvU64 physAddrBufferSize;
// In: Page size for mapping
// If this field is passed as 0, the page size
// of the allocation is used for mapping.
// nvUvmInterfaceGetExternalAllocPtes must pass
// this field as zero.
NvU64 mappingPageSize;
// In: Pointer to a buffer to store PhysAddrs.
// Out: The interface will fill the buffer with PhysAddrs
NvU64 *physAddrBuffer;
// Out: Number of PhysAddrs filled in to the buffer.
NvU64 numWrittenPhysAddrs;
// Out: Number of PhysAddrs remaining to be filled
// if the buffer is not sufficient to accommodate
// requested PhysAddrs.
NvU64 numRemainingPhysAddrs;
} UvmGpuExternalPhysAddrInfo;
typedef struct UvmGpuP2PCapsParams_tag
{
// Out: peerId[i] contains gpu[i]'s peer id of gpu[1 - i]. Only defined if
@@ -590,19 +620,12 @@ typedef struct UvmGpuClientInfo_tag
NvHandle hSmcPartRef;
} UvmGpuClientInfo;
typedef enum
{
UVM_GPU_CONF_COMPUTE_MODE_NONE,
UVM_GPU_CONF_COMPUTE_MODE_APM,
UVM_GPU_CONF_COMPUTE_MODE_HCC,
UVM_GPU_CONF_COMPUTE_MODE_COUNT
} UvmGpuConfComputeMode;
typedef struct UvmGpuConfComputeCaps_tag
{
// Out: GPU's confidential compute mode
UvmGpuConfComputeMode mode;
// Is key rotation enabled for UVM keys
// Out: true if Confidential Computing is enabled on the GPU
NvBool bConfComputingEnabled;
// Out: true if key rotation is enabled (for UVM keys) on the GPU
NvBool bKeyRotationEnabled;
} UvmGpuConfComputeCaps;
@@ -660,6 +683,9 @@ typedef struct UvmGpuInfo_tag
// Maximum number of TPCs per GPC
NvU32 maxTpcPerGpcCount;
// Number of access counter buffers.
NvU32 accessCntrBufferCount;
// NV_TRUE if SMC is enabled on this GPU.
NvBool smcEnabled;
@@ -713,6 +739,8 @@ typedef struct UvmGpuInfo_tag
// to NVSwitch peers.
NvU64 nvswitchEgmMemoryWindowStart;
// GPU supports ATS capability
NvBool atsSupport;
} UvmGpuInfo;
typedef struct UvmGpuFbInfo_tag
@@ -721,10 +749,13 @@ typedef struct UvmGpuFbInfo_tag
// RM regions that are not registered with PMA either.
NvU64 maxAllocatableAddress;
NvU32 heapSize; // RAM in KB available for user allocations
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
NvBool bZeroFb; // Zero FB mode enabled.
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
NvU32 heapSize; // RAM in KB available for user allocations
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
NvBool bZeroFb; // Zero FB mode enabled.
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
NvU64 staticBar1Size; // The size of the static mapping
} UvmGpuFbInfo;
typedef struct UvmGpuEccInfo_tag
@@ -736,6 +767,15 @@ typedef struct UvmGpuEccInfo_tag
NvBool bEccEnabled;
} UvmGpuEccInfo;
typedef struct UvmGpuNvlinkInfo_tag
{
unsigned nvlinkMask;
unsigned nvlinkOffset;
void *nvlinkReadLocation;
NvBool *nvlinkErrorNotifier;
NvBool bNvlinkRecoveryEnabled;
} UvmGpuNvlinkInfo;
typedef struct UvmPmaAllocationOptions_tag
{
NvU32 flags;
@@ -852,6 +892,41 @@ typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct
typedef void (*uvmEventIsrTopHalf_t) (void);
#endif
/*******************************************************************************
uvmEventDrainP2P
This function will be called by the GPU driver to signal to UVM that the
GPU has encountered an uncontained error, and all peer work must be drained
to recover. When it is called, the following assumptions/guarantees are
valid/made:
* Impacted user channels have been preempted and disabled
* UVM channels are still running normally and will continue to do
so unless an unrecoverable error is hit on said channels
* UVM must not return from this function until all enqueued work on
* peer channels has drained
* In the context of this function call, RM will still service faults
* UVM must prevent new peer work from being enqueued until the
uvmEventResumeP2P callback is issued
Returns:
NV_OK if UVM has idled peer work and will prevent new peer workloads.
NV_ERR_TIMEOUT if peer work was unable to be drained within a timeout
XXX NV_ERR_* for any other failure (TBD)
*/
typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
/*******************************************************************************
uvmEventResumeP2P
This function will be called by the GPU driver to signal to UVM that the
GPU has recovered from the previously reported uncontained NVLINK error.
When it is called, the following assumptions/guarantees are valid/made:
* UVM is again allowed to enqueue peer work
* UVM channels are still running normally
*/
typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
struct UvmOpsUvmEvents
{
uvmEventSuspend_t suspend;
@@ -864,6 +939,8 @@ struct UvmOpsUvmEvents
uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
uvmEventServiceInterrupt_t serviceInterrupt;
#endif
uvmEventDrainP2P_t drainP2P;
uvmEventResumeP2P_t resumeP2P;
};
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
@@ -1071,11 +1148,13 @@ typedef UvmGpuAccessCntrConfig gpuAccessCntrConfig;
typedef UvmGpuFaultInfo gpuFaultInfo;
typedef UvmGpuMemoryInfo gpuMemoryInfo;
typedef UvmGpuExternalMappingInfo gpuExternalMappingInfo;
typedef UvmGpuExternalPhysAddrInfo gpuExternalPhysAddrInfo;
typedef UvmGpuChannelResourceInfo gpuChannelResourceInfo;
typedef UvmGpuChannelInstanceInfo gpuChannelInstanceInfo;
typedef UvmGpuChannelResourceBindParams gpuChannelResourceBindParams;
typedef UvmGpuFbInfo gpuFbInfo;
typedef UvmGpuEccInfo gpuEccInfo;
typedef UvmGpuNvlinkInfo gpuNvlinkInfo;
typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;

View File

@@ -50,6 +50,8 @@
#define NVKMS_LOG2_LUT_ARRAY_SIZE 10
#define NVKMS_LUT_ARRAY_SIZE (1 << NVKMS_LOG2_LUT_ARRAY_SIZE)
#define NVKMS_OLUT_FP_NORM_SCALE_DEFAULT 0xffffffff
typedef NvU32 NvKmsDeviceHandle;
typedef NvU32 NvKmsDispHandle;
typedef NvU32 NvKmsConnectorHandle;
@@ -245,6 +247,80 @@ struct NvKmsLutRamps {
NvU16 blue[NVKMS_LUT_ARRAY_SIZE]; /*! in */
};
/* Datatypes for LUT capabilities */
enum NvKmsLUTFormat {
/*
* Normalized fixed-point format mapping [0, 1] to [0x0, 0xFFFF].
*/
NVKMS_LUT_FORMAT_UNORM16,
/*
* Half-precision floating point.
*/
NVKMS_LUT_FORMAT_FP16,
/*
* 14-bit fixed-point format required to work around hardware bug 813188.
*
* To convert from UNORM16 to UNORM14_WAR_813188:
* unorm14_war_813188 = ((unorm16 >> 2) & ~7) + 0x6000
*/
NVKMS_LUT_FORMAT_UNORM14_WAR_813188
};
enum NvKmsLUTVssSupport {
NVKMS_LUT_VSS_NOT_SUPPORTED,
NVKMS_LUT_VSS_SUPPORTED,
NVKMS_LUT_VSS_REQUIRED,
};
enum NvKmsLUTVssType {
NVKMS_LUT_VSS_TYPE_NONE,
NVKMS_LUT_VSS_TYPE_LINEAR,
NVKMS_LUT_VSS_TYPE_LOGARITHMIC,
};
struct NvKmsLUTCaps {
/*! Whether this layer or head on this device supports this LUT stage. */
NvBool supported;
/*! Whether this LUT supports VSS. */
enum NvKmsLUTVssSupport vssSupport;
/*!
* The type of VSS segmenting this LUT uses.
*/
enum NvKmsLUTVssType vssType;
/*!
* Expected number of VSS segments.
*/
NvU32 vssSegments;
/*!
* Expected number of LUT entries.
*/
NvU32 lutEntries;
/*!
* Format for each of the LUT entries.
*/
enum NvKmsLUTFormat entryFormat;
};
/* each LUT entry uses this many bytes */
#define NVKMS_LUT_CAPS_LUT_ENTRY_SIZE (4 * sizeof(NvU16))
/* if the LUT surface uses VSS, size of the VSS header */
#define NVKMS_LUT_VSS_HEADER_SIZE (4 * NVKMS_LUT_CAPS_LUT_ENTRY_SIZE)
struct NvKmsLUTSurfaceParams {
NvKmsSurfaceHandle surfaceHandle;
NvU64 offset NV_ALIGN_BYTES(8);
NvU32 vssSegments;
NvU32 lutEntries;
};
/*
* A 3x4 row-major colorspace conversion matrix.
*
@@ -463,6 +539,10 @@ struct NvKmsLayerCapabilities {
* still expected to honor the NvKmsUsageBounds for each head.
*/
NvU64 supportedSurfaceMemoryFormats NV_ALIGN_BYTES(8);
/* Capabilities for each LUT stage in the EVO3 precomp pipeline. */
struct NvKmsLUTCaps ilut;
struct NvKmsLUTCaps tmo;
};
/*!
@@ -683,4 +763,20 @@ struct NvKmsSuperframeInfo {
} view[NVKMS_MAX_SUPERFRAME_VIEWS];
};
/* Fields within NvKmsVblankSemControlDataOneHead::flags */
#define NVKMS_VBLANK_SEM_CONTROL_SWAP_INTERVAL 15:0
struct NvKmsVblankSemControlDataOneHead {
NvU32 requestCounterAccel;
NvU32 requestCounter;
NvU32 flags;
NvU32 semaphore;
NvU64 vblankCount NV_ALIGN_BYTES(8);
};
struct NvKmsVblankSemControlData {
struct NvKmsVblankSemControlDataOneHead head[NV_MAX_HEADS];
};
#endif /* NVKMS_API_TYPES_H */

View File

@@ -124,6 +124,14 @@ struct NvKmsKapiDisplayMode {
#define NVKMS_KAPI_LAYER_INVALID_IDX 0xff
#define NVKMS_KAPI_LAYER_PRIMARY_IDX 0
struct NvKmsKapiLutCaps {
struct {
struct NvKmsLUTCaps ilut;
struct NvKmsLUTCaps tmo;
} layer[NVKMS_KAPI_LAYER_MAX];
struct NvKmsLUTCaps olut;
};
struct NvKmsKapiDeviceResourcesInfo {
NvU32 numHeads;
@@ -169,6 +177,8 @@ struct NvKmsKapiDeviceResourcesInfo {
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
struct NvKmsKapiLutCaps lutCaps;
};
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@@ -262,21 +272,54 @@ struct NvKmsKapiLayerConfig {
NvU16 dstWidth, dstHeight;
enum NvKmsInputColorSpace inputColorSpace;
struct {
NvBool enabled;
struct NvKmsKapiSurface *lutSurface;
NvU64 offset;
NvU32 vssSegments;
NvU32 lutEntries;
} ilut;
struct {
NvBool enabled;
struct NvKmsKapiSurface *lutSurface;
NvU64 offset;
NvU32 vssSegments;
NvU32 lutEntries;
} tmo;
struct NvKmsCscMatrix csc;
NvBool cscUseMain;
struct {
struct NvKmsCscMatrix lmsCtm;
struct NvKmsCscMatrix lmsToItpCtm;
struct NvKmsCscMatrix itpToLmsCtm;
struct NvKmsCscMatrix blendCtm;
struct {
NvBool lmsCtm : 1;
NvBool lmsToItpCtm : 1;
NvBool itpToLmsCtm : 1;
NvBool blendCtm : 1;
} enabled;
} matrixOverrides;
};
struct NvKmsKapiLayerRequestedConfig {
struct NvKmsKapiLayerConfig config;
struct {
NvBool surfaceChanged : 1;
NvBool srcXYChanged : 1;
NvBool srcWHChanged : 1;
NvBool dstXYChanged : 1;
NvBool dstWHChanged : 1;
NvBool cscChanged : 1;
NvBool tfChanged : 1;
NvBool hdrMetadataChanged : 1;
NvBool surfaceChanged : 1;
NvBool srcXYChanged : 1;
NvBool srcWHChanged : 1;
NvBool dstXYChanged : 1;
NvBool dstWHChanged : 1;
NvBool cscChanged : 1;
NvBool tfChanged : 1;
NvBool hdrMetadataChanged : 1;
NvBool matrixOverridesChanged : 1;
NvBool ilutChanged : 1;
NvBool tmoChanged : 1;
} flags;
};
@@ -342,18 +385,30 @@ struct NvKmsKapiHeadModeSetConfig {
struct NvKmsLutRamps *pRamps;
} output;
} lut;
struct {
NvBool enabled;
struct NvKmsKapiSurface *lutSurface;
NvU64 offset;
NvU32 vssSegments;
NvU32 lutEntries;
} olut;
NvU32 olutFpNormScale;
};
struct NvKmsKapiHeadRequestedConfig {
struct NvKmsKapiHeadModeSetConfig modeSetConfig;
struct {
NvBool activeChanged : 1;
NvBool displaysChanged : 1;
NvBool modeChanged : 1;
NvBool hdrInfoFrameChanged : 1;
NvBool colorimetryChanged : 1;
NvBool ilutChanged : 1;
NvBool olutChanged : 1;
NvBool activeChanged : 1;
NvBool displaysChanged : 1;
NvBool modeChanged : 1;
NvBool hdrInfoFrameChanged : 1;
NvBool colorimetryChanged : 1;
NvBool legacyIlutChanged : 1;
NvBool legacyOlutChanged : 1;
NvBool olutChanged : 1;
NvBool olutFpNormScaleChanged : 1;
} flags;
struct NvKmsKapiCursorRequestedConfig cursorRequestedConfig;
@@ -489,6 +544,9 @@ struct NvKmsKapiCreateSurfaceParams {
* explicit_layout is NV_TRUE and layout is
* NvKmsSurfaceMemoryLayoutBlockLinear */
NvU8 log2GobsPerBlockY;
/* [IN] Whether a surface can be updated directly on the screen */
NvBool noDisplayCaching;
};
enum NvKmsKapiAllocationType {
@@ -956,6 +1014,17 @@ struct NvKmsKapiFunctionsTable {
const void *pLinearAddress
);
/*!
* Check if memory object allocated is video memory.
*
* \param [in] memory Memory allocated using allocateMemory()
*
* \return NV_TRUE if memory is vidmem, NV_FALSE otherwise.
*/
NvBool (*isVidmem)(
const struct NvKmsKapiMemory *memory
);
/*!
* Create a formatted surface from an NvKmsKapiMemory object.
*
@@ -1172,21 +1241,6 @@ struct NvKmsKapiFunctionsTable {
NvU64 *pPages
);
/*!
* Check if this memory object can be scanned out for display.
*
* \param [in] device A device allocated using allocateDevice().
*
* \param [in] memory The memory object to check for display support.
*
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
*/
NvBool (*isMemoryValidForDisplay)
(
const struct NvKmsKapiDevice *device,
const struct NvKmsKapiMemory *memory
);
/*
* Import SGT as a memory handle.
*
@@ -1425,7 +1479,7 @@ struct NvKmsKapiFunctionsTable {
);
/*!
* Immediately reset the specified display semaphore to the pending state.
* Immediately initialize the specified display semaphore to the pending state.
*
* Must be called prior to applying a mode set that utilizes the specified
* display semaphore for synchronization.
@@ -1438,7 +1492,7 @@ struct NvKmsKapiFunctionsTable {
* for the specified device.
*/
NvBool
(*resetDisplaySemaphore)
(*tryInitDisplaySemaphore)
(
struct NvKmsKapiDevice *device,
NvU32 semaphoreIndex
@@ -1447,7 +1501,7 @@ struct NvKmsKapiFunctionsTable {
/*!
* Immediately set the specified display semaphore to the displayable state.
*
* Must be called after \ref resetDisplaySemaphore to indicate a mode
* Must be called after \ref tryInitDisplaySemaphore to indicate a mode
* configuration change that utilizes the specified display semaphore for
* synchronization may proceed.
*
@@ -1471,7 +1525,7 @@ struct NvKmsKapiFunctionsTable {
*
* This can be used by clients to restore a semaphore to a consistent state
* when they have prepared it for use by previously calling
* \ref resetDisplaySemaphore() on it, but are then prevented from
* \ref tryInitDisplaySemaphore() on it, but are then prevented from
* submitting the associated hardware operations to consume it due to the
* subsequent failure of some software or hardware operation.
*
@@ -1504,6 +1558,16 @@ struct NvKmsKapiFunctionsTable {
struct NvKmsKapiDevice *device,
NvS32 index
);
/*
* Notify NVKMS that the system's framebuffer console has been disabled and
* the reserved allocation for the old framebuffer console can be unmapped.
*/
void
(*framebufferConsoleDisabled)
(
struct NvKmsKapiDevice *device
);
};
/** @} */
@@ -1518,6 +1582,20 @@ NvBool nvKmsKapiGetFunctionsTable
struct NvKmsKapiFunctionsTable *funcsTable
);
NvU32 nvKmsKapiF16ToF32(NvU16 a);
NvU16 nvKmsKapiF32ToF16(NvU32 a);
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact);
NvU32 nvKmsKapiUI32ToF32(NvU32 a);
/** @} */
#endif /* defined(__NVKMS_KAPI_H__) */

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -34,19 +34,25 @@
/*
* This is the maximum number of GPUs supported in a single system.
*/
#define NV_MAX_DEVICES 32
#define NV_MAX_DEVICES 32
/*
* This is the maximum number of subdevices within a single device.
*/
#define NV_MAX_SUBDEVICES 8
#define NV_MAX_SUBDEVICES 8
/*
* This is the maximum length of the process name string.
*/
#define NV_PROC_NAME_MAX_LENGTH 100U
#define NV_PROC_NAME_MAX_LENGTH 100U
/*
* This is the maximum number of heads per GPU.
*/
#define NV_MAX_HEADS 4
#define NV_MAX_HEADS 4
/*
* Maximum length of a MIG device UUID. It is a 36-byte UUID string plus a
* 4-byte prefix and NUL terminator: 'M' 'I' 'G' '-' UUID '\0x0'
*/
#define NV_MIG_DEVICE_UUID_STR_LENGTH 41U

View File

@@ -33,38 +33,18 @@ extern "C" {
#include "nvtypes.h"
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
//
// Miscellaneous macros useful for bit field manipulations
//
// STUPID HACK FOR CL 19434692. Will revert when fix CL is delivered bfm -> chips_a.
#ifndef BIT
#define BIT(b) (1U<<(b))
#endif
#ifndef BIT32
#define BIT32(b) ((NvU32)1U<<(b))
#endif
#ifndef BIT64
#define BIT64(b) ((NvU64)1U<<(b))
#endif
#endif
//
// It is recommended to use the following bit macros to avoid macro name
// collisions with other src code bases.
//
// Miscellaneous macros useful for bit field manipulations.
#ifndef NVBIT
#define NVBIT(b) (1U<<(b))
#define NVBIT(b) (1U<<(b))
#endif
#ifndef NVBIT_TYPE
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
#endif
#ifndef NVBIT32
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
#endif
#ifndef NVBIT64
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
#endif
//Concatenate 2 32bit values to a 64bit value
@@ -72,7 +52,7 @@ extern "C" {
// Helper macro's for 32 bit bitmasks
#define NV_BITMASK32_ELEMENT_SIZE (sizeof(NvU32) << 3)
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
#define NV_BITMASK32_OFFSET(chId) ((chId) & (0x1F))
#define NV_BITMASK32_SET(pChannelMask, chId) \
(pChannelMask)[NV_BITMASK32_IDX(chId)] |= NVBIT(NV_BITMASK32_OFFSET(chId))
@@ -721,6 +701,42 @@ nvPrevPow2_U64(const NvU64 x )
} \
}
//
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
// ucode compilers to avoid signature mismatch.
//
#ifndef NVDEC_1_0
/*!
* Returns the position of nth set bit in the given mask.
*
* Returns -1 if mask has fewer than n bits set.
*
* n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is
* the first set LSB.
*
* Example, if mask = 0x000000F0u and n = 1, the return value will be 5.
* Example, if mask = 0x000000F0u and n = 4, the return value will be -1.
*/
static NV_FORCEINLINE NvS32
nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
{
NvU32 seenSetBitsCount = 0;
NvS32 index;
FOR_EACH_INDEX_IN_MASK(32, index, mask)
{
if (seenSetBitsCount == n)
{
return index;
}
++seenSetBitsCount;
}
FOR_EACH_INDEX_IN_MASK_END;
return -1;
}
#endif // NVDEC_1_0
//
// Size to use when declaring variable-sized arrays
//
@@ -954,6 +970,22 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
// Get the number of elements the specified fixed-size array
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
//
// Deprecated macros whose definition can be removed once the code base no longer references them.
// Use the NVBIT* macros instead of these macros.
//
#ifndef BIT
#define BIT(b) (1U<<(b))
#endif
#ifndef BIT32
#define BIT32(b) ((NvU32)1U<<(b))
#endif
#ifndef BIT64
#define BIT64(b) ((NvU64)1U<<(b))
#endif
#endif
#ifdef __cplusplus
}
#endif //__cplusplus

View File

@@ -155,6 +155,10 @@ NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation no
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED, 0x0000007F, "GFW secure boot failed")
NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY, 0x00000080, "No more ZBC entry for the client")
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

View File

@@ -40,8 +40,11 @@
#include "nv_stdarg.h"
#include <nv-kernel-interface-api.h>
#include <os/nv_memory_type.h>
#include <os/nv_memory_area.h>
#include <nv-caps.h>
#include "rs_access.h"
typedef struct
@@ -102,8 +105,10 @@ NvBool NV_API_CALL os_pci_remove_supported (void);
void NV_API_CALL os_pci_remove (void *);
void* NV_API_CALL os_map_kernel_space (NvU64, NvU64, NvU32);
void NV_API_CALL os_unmap_kernel_space (void *, NvU64);
void* NV_API_CALL os_map_user_space (NvU64, NvU64, NvU32, NvU32, void **);
#if defined(NV_VMWARE)
void* NV_API_CALL os_map_user_space (MemoryArea *, NvU32, NvU32, void **);
void NV_API_CALL os_unmap_user_space (void *, NvU64, void *);
#endif
NV_STATUS NV_API_CALL os_flush_cpu_cache_all (void);
NV_STATUS NV_API_CALL os_flush_user_cache (void);
void NV_API_CALL os_flush_cpu_write_combine_buffer(void);
@@ -114,7 +119,7 @@ void NV_API_CALL os_io_write_byte (NvU32, NvU8);
void NV_API_CALL os_io_write_word (NvU32, NvU16);
void NV_API_CALL os_io_write_dword (NvU32, NvU32);
NvBool NV_API_CALL os_is_administrator (void);
NvBool NV_API_CALL os_allow_priority_override (void);
NvBool NV_API_CALL os_check_access (RsAccessRight accessRight);
void NV_API_CALL os_dbg_init (void);
void NV_API_CALL os_dbg_breakpoint (void);
void NV_API_CALL os_dbg_set_level (NvU32);
@@ -130,7 +135,8 @@ void NV_API_CALL os_free_spinlock (void *);
NvU64 NV_API_CALL os_acquire_spinlock (void *);
void NV_API_CALL os_release_spinlock (void *, NvU64);
NV_STATUS NV_API_CALL os_queue_work_item (struct os_work_queue *, void *);
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *);
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *, NvBool);
NvBool NV_API_CALL os_is_queue_flush_ongoing (struct os_work_queue *);
NV_STATUS NV_API_CALL os_alloc_mutex (void **);
void NV_API_CALL os_free_mutex (void *);
NV_STATUS NV_API_CALL os_acquire_mutex (void *);
@@ -164,7 +170,7 @@ NvU32 NV_API_CALL os_get_grid_csp_support (void);
void NV_API_CALL os_bug_check (NvU32, const char *);
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *, NvU32);
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
NV_STATUS NV_API_CALL os_get_smbios_header (NvU64 *pSmbsAddr);
@@ -172,6 +178,7 @@ NV_STATUS NV_API_CALL os_get_acpi_rsdp_from_uefi (NvU32 *);
void NV_API_CALL os_add_record_for_crashLog (void *, NvU32);
void NV_API_CALL os_delete_record_for_crashLog (void *);
NV_STATUS NV_API_CALL os_call_vgpu_vfio (void *, NvU32);
NV_STATUS NV_API_CALL os_device_vm_present (void);
NV_STATUS NV_API_CALL os_numa_memblock_size (NvU64 *);
NV_STATUS NV_API_CALL os_alloc_pages_node (NvS32, NvU32, NvU32, NvU64 *);
NV_STATUS NV_API_CALL os_get_page (NvU64 address);
@@ -207,6 +214,7 @@ enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
void NV_API_CALL os_pci_trigger_flr(void *handle);
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
@@ -214,11 +222,14 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
NvBool NV_API_CALL os_is_init_ns(void);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvBool os_cc_enabled;
extern NvBool os_cc_sev_snp_enabled;
extern NvBool os_cc_snp_vtom_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
extern NvBool os_imex_channel_is_supported;

View File

@@ -36,4 +36,69 @@ typedef struct MemoryArea
NvU64 numRanges;
} MemoryArea;
static inline NvU64 memareaSize(MemoryArea memArea)
{
NvU64 size = 0;
NvU64 idx = 0;
for (idx = 0; idx < memArea.numRanges; idx++)
{
size += memArea.pRanges[idx].size;
}
return size;
}
static inline MemoryRange
mrangeMake
(
NvU64 start,
NvU64 size
)
{
MemoryRange range;
range.start = start;
range.size = size;
return range;
}
static inline NvU64
mrangeLimit
(
MemoryRange a
)
{
return a.start + a.size;
}
static inline NvBool
mrangeIntersects
(
MemoryRange a,
MemoryRange b
)
{
return ((a.start >= b.start) && (a.start < mrangeLimit(b))) ||
((b.start >= a.start) && (b.start < mrangeLimit(a)));
}
static inline NvBool
mrangeContains
(
MemoryRange outer,
MemoryRange inner
)
{
return (inner.start >= outer.start) && (mrangeLimit(inner) <= mrangeLimit(outer));
}
static inline MemoryRange
mrangeOffset
(
MemoryRange range,
NvU64 amt
)
{
range.start += amt;
return range;
}
#endif /* NV_MEMORY_AREA_H */

View File

@@ -85,9 +85,11 @@ NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDevi
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
void NV_API_CALL rm_gpu_ops_p2p_object_destroy(nvidia_stack_t *, nvgpuSessionHandle_t, NvHandle);
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_ptes(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalMappingInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_phys_addrs(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalPhysAddrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_retain_channel(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvHandle, NvHandle, void **, nvgpuChannelInstanceInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_bind_channel_resources(nvidia_stack_t *, void *, nvgpuChannelResourceBindParams_t);
void NV_API_CALL rm_gpu_ops_release_channel(nvidia_stack_t *, void *);
@@ -100,6 +102,7 @@ void NV_API_CALL rm_gpu_ops_paging_channel_destroy(nvidia_stack_t *, nvgpu
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t, NvU64 *);
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
void NV_API_CALL rm_gpu_ops_report_fatal_error(nvidia_stack_t *, NV_STATUS error);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);

View File

@@ -0,0 +1,276 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <nvtypes.h>
#if defined(_MSC_VER)
#pragma warning(disable:4324)
#endif
//
// This file was generated with FINN, an NVIDIA coding tool.
// Source file: rs_access.finn
//
#include "nvtypes.h"
#include "nvmisc.h"
/****************************************************************************/
/* Access right definitions */
/****************************************************************************/
//
// The meaning of each access right is documented in
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
//
// RS_ACCESS_COUNT is the number of access rights that have been defined
// and are in use. All integers in the range [0, RS_ACCESS_COUNT) should
// represent valid access rights.
//
// When adding a new access right, don't forget to update
// 1) The descriptions in the resman/docs/rmapi/resource_server/rm_capabilities.adoc
// 2) RS_ACCESS_COUNT, defined below
// 3) The declaration of g_rsAccessMetadata in rs_access_rights.c
// 4) The list of access rights in drivers/common/chip-config/Chipcontrols.pm
// 5) Any relevant access right callbacks
//
#define RS_ACCESS_DUP_OBJECT 0U
#define RS_ACCESS_NICE 1U
#define RS_ACCESS_DEBUG 2U
#define RS_ACCESS_PERFMON 3U
#define RS_ACCESS_COUNT 4U
/****************************************************************************/
/* Access right data structures */
/****************************************************************************/
/*!
* @brief A type that can be used to represent any access right.
*/
typedef NvU16 RsAccessRight;
/*!
* @brief An internal type used to represent one limb in an access right mask.
*/
typedef NvU32 RsAccessLimb;
#define SDK_RS_ACCESS_LIMB_BITS 32
/*!
* @brief The number of limbs in the RS_ACCESS_MASK struct.
*/
#define SDK_RS_ACCESS_MAX_LIMBS 1
/*!
* @brief The maximum number of possible access rights supported by the
* current data structure definition.
*
* You probably want RS_ACCESS_COUNT instead, which is the number of actual
* access rights defined.
*/
#define SDK_RS_ACCESS_MAX_COUNT (0x20) /* finn: Evaluated from "(SDK_RS_ACCESS_LIMB_BITS * SDK_RS_ACCESS_MAX_LIMBS)" */
/**
* @brief A struct representing a set of access rights.
*
* Note that the values of bit positions larger than RS_ACCESS_COUNT is
* undefined, and should not be assumed to be 0 (see RS_ACCESS_MASK_FILL).
*/
typedef struct RS_ACCESS_MASK {
RsAccessLimb limbs[SDK_RS_ACCESS_MAX_LIMBS];
} RS_ACCESS_MASK;
/**
* @brief A struct representing auxiliary information about each access right.
*/
typedef struct RS_ACCESS_INFO {
NvU32 flags;
} RS_ACCESS_INFO;
/****************************************************************************/
/* Access right macros */
/****************************************************************************/
#define SDK_RS_ACCESS_LIMB_INDEX(index) ((index) / SDK_RS_ACCESS_LIMB_BITS)
#define SDK_RS_ACCESS_LIMB_POS(index) ((index) % SDK_RS_ACCESS_LIMB_BITS)
#define SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) \
((pAccessMask)->limbs[SDK_RS_ACCESS_LIMB_INDEX(index)])
#define SDK_RS_ACCESS_OFFSET_MASK(index) \
NVBIT_TYPE(SDK_RS_ACCESS_LIMB_POS(index), RsAccessLimb)
/*!
* @brief Checks that accessRight represents a valid access right.
*
* The valid range of access rights is [0, RS_ACCESS_COUNT).
*
* @param[in] accessRight The access right value to check
*
* @return true if accessRight is valid
* @return false otherwise
*/
#define RS_ACCESS_BOUNDS_CHECK(accessRight) \
(accessRight < RS_ACCESS_COUNT)
/*!
* @brief Test whether an access right is present in a set
*
* @param[in] pAccessMask The set of access rights to read
* @param[in] index The access right to examine
*
* @return NV_TRUE if the access right specified by index was present in the set,
* and NV_FALSE otherwise
*/
#define RS_ACCESS_MASK_TEST(pAccessMask, index) \
(RS_ACCESS_BOUNDS_CHECK(index) && \
(SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) & SDK_RS_ACCESS_OFFSET_MASK(index)) != 0)
/*!
* @brief Add an access right to a mask
*
* @param[in] pAccessMask The set of access rights to modify
* @param[in] index The access right to set
*/
#define RS_ACCESS_MASK_ADD(pAccessMask, index) \
do \
{ \
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) |= SDK_RS_ACCESS_OFFSET_MASK(index); \
} \
} while (NV_FALSE)
/*!
* @brief Remove an access right from a mask
*
* @param[in] pAccessMask The set of access rights to modify
* @param[in] index The access right to unset
*/
#define RS_ACCESS_MASK_REMOVE(pAccessMask, index) \
do \
{ \
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) &= ~SDK_RS_ACCESS_OFFSET_MASK(index); \
} \
} while (NV_FALSE)
/*!
* @brief Performs an in-place union between two access right masks
*
* @param[in,out] pMaskOut The access rights mask to be updated
* @param[in] pMaskIn The set of access rights to be added to pMaskOut
*/
#define RS_ACCESS_MASK_UNION(pMaskOut, pMaskIn) \
do \
{ \
NvLength limb; \
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
{ \
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) |= SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
} \
} while (NV_FALSE)
/*!
* @brief Performs an in-place subtract of one mask's rights from another
*
* @param[in,out] pMaskOut The access rights mask to be updated
* @param[in] pMaskIn The set of access rights to be removed from pMaskOut
*/
#define RS_ACCESS_MASK_SUBTRACT(pMaskOut, pMaskIn) \
do \
{ \
NvLength limb; \
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
{ \
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) &= ~SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
} \
} while (NV_FALSE)
/*!
* @brief Removes all rights from an access rights mask
*
* @param[in,out] pAccessMask The access rights mask to be updated
*/
#define RS_ACCESS_MASK_CLEAR(pAccessMask) \
do \
{ \
portMemSet(pAccessMask, 0, sizeof(*pAccessMask)); \
} while (NV_FALSE)
/*!
* @brief Adds all rights to an access rights mask
*
* @param[in,out] pAccessMask The access rights mask to be updated
*/
#define RS_ACCESS_MASK_FILL(pAccessMask) \
do \
{ \
portMemSet(pAccessMask, 0xff, sizeof(*pAccessMask)); \
} while (NV_FALSE)
/****************************************************************************/
/* Share definitions */
/****************************************************************************/
//
// The usage of Share Policy and the meaning of each share type is documented in
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
//
#define RS_SHARE_TYPE_NONE (0U)
#define RS_SHARE_TYPE_ALL (1U)
#define RS_SHARE_TYPE_OS_SECURITY_TOKEN (2U)
#define RS_SHARE_TYPE_CLIENT (3U)
#define RS_SHARE_TYPE_PID (4U)
#define RS_SHARE_TYPE_SMC_PARTITION (5U)
#define RS_SHARE_TYPE_GPU (6U)
#define RS_SHARE_TYPE_FM_CLIENT (7U)
// Must be last. Update when a new SHARE_TYPE is added
#define RS_SHARE_TYPE_MAX (8U)
//
// Use Revoke to remove an existing policy from the list.
// Allow is based on OR logic, Require is based on AND logic.
// To share a right, at least one Allow (non-Require) must match, and all Require must pass.
// If Compose is specified, policies will be added to the list. Otherwise, they will replace the list.
//
#define RS_SHARE_ACTION_FLAG_REVOKE NVBIT(0)
#define RS_SHARE_ACTION_FLAG_REQUIRE NVBIT(1)
#define RS_SHARE_ACTION_FLAG_COMPOSE NVBIT(2)
/****************************************************************************/
/* Share flag data structures */
/****************************************************************************/
typedef struct RS_SHARE_POLICY {
NvU32 target;
RS_ACCESS_MASK accessMask;
NvU16 type; ///< RS_SHARE_TYPE_
NvU8 action; ///< RS_SHARE_ACTION_
} RS_SHARE_POLICY;

View File

@@ -25,6 +25,7 @@ fi
# VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
# GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
# GRID_BUILD_CSP parameter defined only for GRID CSP builds (GRID Guest driver for CSPs)
# VGX_DEVICE_VM_BUILD parameter defined only for Device VM VGX build (vGPU Host driver)
test_xen() {
#
@@ -661,23 +662,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
;;
pci_bus_address)
#
# Determine if the pci_bus_address() function is
# present.
#
# Added by commit 06cf56e497c8 ("PCI: Add pci_bus_address() to
# get bus address of a BAR") in v3.14
#
CODE="
#include <linux/pci.h>
void conftest_pci_bus_address(void) {
pci_bus_address();
}"
compile_check_conftest "$CODE" "NV_PCI_BUS_ADDRESS_PRESENT" "" "functions"
;;
hash__remap_4k_pfn)
#
# Determine if the hash__remap_4k_pfn() function is
@@ -823,6 +807,16 @@ compile_test() {
return
;;
device_vm_build)
# Add config parameter if running on Device VM.
if [ -n "$VGX_DEVICE_VM_BUILD" ]; then
echo "#define NV_DEVICE_VM_BUILD" | append_conftest "generic"
else
echo "#undef NV_DEVICE_VM_BUILD" | append_conftest "generic"
fi
return
;;
vfio_register_notifier)
#
# Check number of arguments required.
@@ -1290,6 +1284,77 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
;;
egm_module_helper_api_present)
#
# Determine if egm management api are present or not.
#
CODE="
#include <linux/pci.h>
#include <linux/nvgrace-egm.h>
void conftest_egm_module_helper_api_present() {
struct pci_dev *pdev;
register_egm_node(pdev);
unregister_egm_node(0);
}
"
compile_check_conftest "$CODE" "NV_EGM_MODULE_HELPER_API_PRESENT" "" "types"
;;
egm_bad_pages_handling_support)
#
# Determine if egm_bad_pages_list is present or not.
#
CODE="
#include <linux/types.h>
#include <linux/egm.h>
void conftest_egm_bad_pages_handle() {
int ioctl = EGM_BAD_PAGES_LIST;
struct egm_bad_pages_list list;
}
"
compile_check_conftest "$CODE" "NV_EGM_BAD_PAGES_HANDLING_SUPPORT" "" "types"
;;
class_create_has_no_owner_arg)
#
# Determine if the class_create API with the new signature
# is present or not.
#
# Added by commit 1aaba11da9aa ("driver core: class: remove
# module * from class_create()") in v6.4 (2023-03-13)
#
CODE="
#include <linux/device/class.h>
void conftest_class_create() {
struct class *class;
class = class_create(\"test\");
}"
compile_check_conftest "$CODE" "NV_CLASS_CREATE_HAS_NO_OWNER_ARG" "" "types"
;;
class_devnode_has_const_arg)
#
# Determine if the class.devnode is present with the new signature.
#
# Added by commit ff62b8e6588f ("driver core: make struct
# class.devnode() take a const *") in v6.2 (2022-11-23)
#
CODE="
#include <linux/device.h>
static char *conftest_devnode(const struct device *device, umode_t *mode) {
return NULL;
}
void conftest_class_devnode() {
struct class class;
class.devnode = conftest_devnode;
}"
compile_check_conftest "$CODE" "NV_CLASS_DEVNODE_HAS_CONST_ARG" "" "types"
;;
pci_irq_vector_helpers)
#
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
@@ -1538,23 +1603,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GET_NUM_PHYSPAGES_PRESENT" "" "functions"
;;
backing_dev_info)
#
# Determine if the 'address_space' structure has
# a 'backing_dev_info' field.
#
# Removed by commit b83ae6d42143 ("fs: remove
# mapping->backing_dev_info") in v4.0
#
CODE="
#include <linux/fs.h>
int conftest_backing_dev_info(void) {
return offsetof(struct address_space, backing_dev_info);
}"
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO" "" "types"
;;
xen_ioemu_inject_msi)
# Determine if the xen_ioemu_inject_msi() function is present.
CODE="
@@ -1804,22 +1852,6 @@ compile_test() {
fi
;;
pnv_pci_get_npu_dev)
#
# Determine if the pnv_pci_get_npu_dev function is present.
#
# Added by commit 5d2aa710e697 ("powerpc/powernv: Add support
# for Nvlink NPUs") in v4.5
#
CODE="
#include <linux/pci.h>
void conftest_pnv_pci_get_npu_dev() {
pnv_pci_get_npu_dev();
}"
compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
;;
kernel_write_has_pointer_pos_arg)
#
# Determine the pos argument type, which was changed by commit
@@ -2409,45 +2441,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
;;
wait_on_bit_lock_argument_count)
#
# Determine how many arguments wait_on_bit_lock takes.
#
# Changed by commit 743162013d40 ("sched: Remove proliferation
# of wait_on_bit() action functions") in v3.17 (2014-07-07)
#
echo "$CONFTEST_PREAMBLE
#include <linux/wait.h>
void conftest_wait_on_bit_lock(void) {
wait_on_bit_lock(NULL, 0, 0);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 3" | append_conftest "functions"
return
fi
echo "$CONFTEST_PREAMBLE
#include <linux/wait.h>
void conftest_wait_on_bit_lock(void) {
wait_on_bit_lock(NULL, 0, NULL, 0);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 4" | append_conftest "functions"
return
fi
echo "#error wait_on_bit_lock() conftest failed!" | append_conftest "functions"
;;
pci_stop_and_remove_bus_device)
#
# Determine if the pci_stop_and_remove_bus_device() function is present.
@@ -2523,29 +2516,20 @@ compile_test() {
fi
;;
mm_context_t)
file_operations_fop_unsigned_offset_present)
#
# Determine if the 'mm_context_t' data type is present
# and if it has an 'id' member.
# It does not exist on all architectures.
# Determine if the FOP_UNSIGNED_OFFSET define is present.
#
echo "$CONFTEST_PREAMBLE
#include <linux/mm.h>
int conftest_mm_context_t(void) {
return offsetof(mm_context_t, id);
}" > conftest$$.c
# Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
# fop_flags") in v6.12.
#
CODE="
#include <linux/fs.h>
int conftest_file_operations_fop_unsigned_offset_present(void) {
return FOP_UNSIGNED_OFFSET;
}"
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
rm -f conftest$$.o
return
else
echo "#undef NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
return
fi
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
;;
pci_dev_has_ats_enabled)
@@ -5102,6 +5086,42 @@ compile_test() {
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
;;
cc_attr_guest_sev_snp)
#
# Determine if 'CC_ATTR_GUEST_SEV_SNP' is present.
#
# Added by commit aa5a461171f9 ("x86/mm: Extend cc_attr to
# include AMD SEV-SNP") in v5.19.
#
CODE="
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
#include <linux/cc_platform.h>
#endif
enum cc_attr cc_attributes = CC_ATTR_GUEST_SEV_SNP;
"
compile_check_conftest "$CODE" "NV_CC_ATTR_SEV_SNP" "" "types"
;;
hv_get_isolation_type)
#
# Determine if 'hv_get_isolation_type()' is present.
# Added by commit faff44069ff5 ("x86/hyperv: Add Write/Read MSR
# registers via ghcb page") in v5.16.
#
CODE="
#if defined(NV_ASM_MSHYPERV_H_PRESENT)
#include <asm/mshyperv.h>
#endif
void conftest_hv_get_isolation_type(void) {
int i;
hv_get_isolation_type(i);
}"
compile_check_conftest "$CODE" "NV_HV_GET_ISOLATION_TYPE" "" "functions"
;;
drm_prime_pages_to_sg_has_drm_device_arg)
#
# Determine if drm_prime_pages_to_sg() has 'dev' argument.
@@ -5650,6 +5670,26 @@ compile_test() {
compile_check_conftest "$CODE" "NV_ICC_GET_PRESENT" "" "functions"
;;
devm_of_icc_get)
#
# Determine if devm_of_icc_get() function is present
#
# Added by commit e145d9a ("interconnect: Add devm_of_icc_get() as
# exported API for user interconnect API")
#
CODE="
#if defined(NV_LINUX_INTERCONNECT_H_PRESENT)
#include <linux/interconnect.h>
#endif
void conftest_devm_of_icc_get(void)
{
devm_of_icc_get();
}
"
compile_check_conftest "$CODE" "NV_DEVM_ICC_GET_PRESENT" "" "functions"
;;
icc_set_bw)
#
# Determine if icc_set_bw() function is present
@@ -6096,6 +6136,20 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
;;
pcie_reset_flr)
#
# Determine if the pcie_reset_flr() function is present
#
# Added by commit 56f107d ("PCI: Add pcie_reset_flr() with
# 'probe' argument") in v5.15.
CODE="
#include <linux/pci.h>
int conftest_pcie_reset_flr(void) {
return pcie_reset_flr();
}"
compile_check_conftest "$CODE" "NV_PCIE_RESET_FLR_PRESENT" "" "functions"
;;
devm_clk_bulk_get_all)
#
# Determine if devm_clk_bulk_get_all() function is present
@@ -6596,7 +6650,9 @@ compile_test() {
# Determine whether drm_fbdev_generic_setup is present.
#
# Added by commit 9060d7f49376 ("drm/fb-helper: Finish the
# generic fbdev emulation") in v4.19.
# generic fbdev emulation") in v4.19. Removed by commit
# aae4682e5d66 ("drm/fbdev-generic: Convert to fbdev-ttm")
# in v6.11.
#
CODE="
#include <drm/drm_fb_helper.h>
@@ -6608,6 +6664,100 @@ compile_test() {
}"
compile_check_conftest "$CODE" "NV_DRM_FBDEV_GENERIC_SETUP_PRESENT" "" "functions"
;;
drm_fbdev_ttm_setup)
#
# Determine whether drm_fbdev_ttm_setup is present.
#
# Added by commit aae4682e5d66 ("drm/fbdev-generic:
# Convert to fbdev-ttm") in v6.11. Removed by commit
# 1000634477d8 ("drm/fbdev-ttm:Convert to client-setup") in v6.13.
#
CODE="
#include <drm/drm_fb_helper.h>
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
#include <drm/drm_fbdev_ttm.h>
#endif
void conftest_drm_fbdev_ttm_setup(void) {
drm_fbdev_ttm_setup();
}"
compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
;;
drm_client_setup)
#
# Determine whether drm_client_setup is present.
#
# Added by commit d07fdf922592 ("drm/fbdev-ttm:
# Convert to client-setup") in v6.13.
#
CODE="
#include <drm/drm_fb_helper.h>
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/drm_client_setup.h>
#endif
void conftest_drm_client_setup(void) {
drm_client_setup();
}"
compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions"
;;
drm_output_poll_changed)
#
# Determine whether drm_mode_config_funcs.output_poll_changed
# callback is present
#
# Removed by commit 446d0f4849b1 ("drm: Remove struct
# drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug
# event support is handled through the fbdev emulation interface
# going forward.
#
CODE="
#if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT)
#include <drm/drm_mode_config.h>
#else
#include <drm/drm_crtc.h>
#endif
int conftest_drm_output_poll_changed_available(void) {
return offsetof(struct drm_mode_config_funcs, output_poll_changed);
}"
compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
;;
aperture_remove_conflicting_devices)
#
# Determine whether aperture_remove_conflicting_devices is present.
#
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
# helpers under video/") in v6.0
CODE="
#if defined(NV_LINUX_APERTURE_H_PRESENT)
#include <linux/aperture.h>
#endif
void conftest_aperture_remove_conflicting_devices(void) {
aperture_remove_conflicting_devices();
}"
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions"
;;
aperture_remove_conflicting_pci_devices)
#
# Determine whether aperture_remove_conflicting_pci_devices is present.
#
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
# helpers under video/") in v6.0
CODE="
#if defined(NV_LINUX_APERTURE_H_PRESENT)
#include <linux/aperture.h>
#endif
void conftest_aperture_remove_conflicting_pci_devices(void) {
aperture_remove_conflicting_pci_devices();
}"
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions"
;;
drm_aperture_remove_conflicting_pci_framebuffers)
@@ -6703,17 +6853,17 @@ compile_test() {
# This test is not complete and may return false positive.
#
CODE="
#include <crypto/akcipher.h>
#include <crypto/algapi.h>
#include <crypto/ecc_curve.h>
#include <crypto/ecdh.h>
#include <crypto/hash.h>
#include <crypto/internal/ecc.h>
#include <crypto/kpp.h>
#include <crypto/public_key.h>
#include <crypto/sm3.h>
#include <keys/asymmetric-type.h>
#include <linux/crypto.h>
#include <crypto/akcipher.h>
#include <crypto/algapi.h>
#include <crypto/ecc_curve.h>
#include <crypto/ecdh.h>
#include <crypto/hash.h>
#include <crypto/internal/ecc.h>
#include <crypto/kpp.h>
#include <crypto/public_key.h>
#include <crypto/sm3.h>
#include <keys/asymmetric-type.h>
#include <linux/crypto.h>
void conftest_crypto(void) {
struct shash_desc sd;
struct crypto_shash cs;
@@ -6723,6 +6873,47 @@ compile_test() {
compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols"
;;
crypto_akcipher_verify)
#
# Determine whether the crypto_akcipher_verify API is still present.
# It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations')
# in v6.13-rc1 (2024-10-04).
#
# This test is dependent on the crypto conftest to determine whether crypto should be
# enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify
#
# The test merely checks for the presence of the API, as it assumes that if the API
# is no longer present, the new API to replace it (crypto_sig_verify) must be present.
# If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto
# conftest above and all crypto code will be compiled out.
#
CODE="
#include <crypto/akcipher.h>
#include <linux/crypto.h>
void conftest_crypto_akcipher_verify(void) {
(void)crypto_akcipher_verify;
}"
compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols"
;;
ecc_digits_from_bytes)
#
# Determine whether ecc_digits_from_bytes is present.
# It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from
# reading too many bytes') in v6.10.
#
# This functionality is needed when crypto_akcipher_verify is not present.
#
CODE="
#include <crypto/internal/ecc.h>
void conftest_ecc_digits_from_bytes(void) {
(void)ecc_digits_from_bytes;
}"
compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols"
;;
mempolicy_has_unified_nodes)
#
# Determine if the 'mempolicy' structure has
@@ -6990,6 +7181,334 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_PROPERTY_BLOB_PUT_PRESENT" "" "functions"
;;
drm_driver_has_gem_prime_mmap)
#
# Determine if the 'drm_driver' structure has a 'gem_prime_mmap'
# function pointer.
#
# Removed by commit 0adec22702d4 ("drm: Remove struct
# drm_driver.gem_prime_mmap") in v6.6.
#
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
int conftest_drm_driver_has_gem_prime_mmap(void) {
return offsetof(struct drm_driver, gem_prime_mmap);
}"
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP" "" "types"
;;
drm_gem_prime_mmap)
#
# Determine if the function drm_gem_prime_mmap() is present.
#
# Added by commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()
# in v5.0
#
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
void conftest_drm_gem_prime_mmap(void) {
drm_gem_prime_mmap();
}"
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_MMAP_PRESENT" "" "functions"
;;
vmf_insert_mixed)
#
# Determine if the function vmf_insert_mixed() is present.
#
# Added by commit 1c8f422059ae ("mm: change return type to
# vm_fault_t") in v4.17.
#
CODE="
#include <linux/mm.h>
void conftest_vmf_insert_mixed() {
vmf_insert_mixed();
}"
compile_check_conftest "$CODE" "NV_VMF_INSERT_MIXED_PRESENT" "" "functions"
;;
pfn_to_pfn_t)
#
# Determine if the function pfn_to_pfn_t() is present.
#
# Added by commit 34c0fd540e79 ("mm, dax, pmem: introduce pfn_t") in
# v4.5.
#
CODE="
#if defined(NV_LINUX_PFN_T_H_PRESENT)
#include <linux/pfn_t.h>
#endif
void conftest_pfn_to_pfn_t() {
pfn_to_pfn_t();
}"
compile_check_conftest "$CODE" "NV_PFN_TO_PFN_T_PRESENT" "" "functions"
;;
drm_gem_dmabuf_mmap)
#
# Determine if the drm_gem_dmabuf_mmap() function is present.
#
# drm_gem_dmabuf_mmap() was exported by commit c308279f8798 ("drm:
# export gem dmabuf_ops for drivers to reuse") in v4.17.
#
CODE="
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
void conftest_drm_gem_dmabuf_mmap(void) {
drm_gem_dmabuf_mmap();
}"
compile_check_conftest "$CODE" "NV_DRM_GEM_DMABUF_MMAP_PRESENT" "" "functions"
;;
drm_gem_prime_export_has_dev_arg)
#
# Determine if drm_gem_prime_export() function has a 'dev' argument.
#
# This argument was removed by commit e4fa8457b219 ("drm/prime:
# Align gem_prime_export with obj_funcs.export") in v5.4.
#
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
#include <drm/drm_prime.h>
#endif
void conftest_drm_gem_prime_export_has_dev_arg(
struct drm_device *dev,
struct drm_gem_object *obj) {
(void) drm_gem_prime_export(dev, obj, 0);
}"
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_EXPORT_HAS_DEV_ARG" "" "types"
;;
dma_buf_ops_has_cache_sgt_mapping)
#
# Determine if dma_buf_ops structure has a 'cache_sgt_mapping'
# member.
#
# dma_buf_ops::cache_sgt_mapping was added by commit f13e143e7444
# ("dma-buf: start caching of sg_table objects v2") in v5.3.
#
CODE="
#include <linux/dma-buf.h>
int conftest_dma_ops_has_cache_sgt_mapping(void) {
return offsetof(struct dma_buf_ops, cache_sgt_mapping);
}"
compile_check_conftest "$CODE" "NV_DMA_BUF_OPS_HAS_CACHE_SGT_MAPPING" "" "types"
;;
drm_gem_object_funcs)
#
# Determine if the 'struct drm_gem_object_funcs' type is present.
#
# Added by commit b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
# in v5.0.
#
CODE="
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
#include <drm/drm_gem.h>
#endif
struct drm_gem_object_funcs funcs;"
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_FUNCS_PRESENT" "" "types"
;;
sg_dma_page_iter)
#
# Determine if the struct sg_dma_page_iter is present.
# This also serves to know if the argument type of the macro
# sg_page_iter_dma_address() changed:
# - before: struct sg_page_iter *piter
# - after: struct sg_dma_page_iter *dma_iter
#
# Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA
# page iterator") v5.0.
#
CODE="
#include <linux/scatterlist.h>
struct sg_dma_page_iter conftest_dma_page_iter;"
compile_check_conftest "$CODE" "NV_SG_DMA_PAGE_ITER_PRESENT" "" "types"
;;
# FIXME: See if we can remove this test
for_each_sgtable_dma_page)
#
# Determine if macro for_each_sgtable_dma_page is present.
#
# Added by commit 709d6d73c756 ("scatterlist: add generic wrappers
# for iterating over sgtable objects") v5.7.
#
CODE="
#include <linux/scatterlist.h>
void conftest_for_each_sgtable_dma_page(void) {
for_each_sgtable_dma_page();
}"
compile_check_conftest "$CODE" "NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT" "" "functions"
;;
drm_aperture_remove_conflicting_framebuffers)
#
# Determine whether drm_aperture_remove_conflicting_framebuffers is present.
#
# drm_aperture_remove_conflicting_framebuffers was added in commit 2916059147ea
# ("drm/aperture: Add infrastructure for aperture ownership) in
# v5.14-rc1 (2021-04-12)
#
CODE="
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
#include <drm/drm_aperture.h>
#endif
void conftest_drm_aperture_remove_conflicting_framebuffers(void) {
drm_aperture_remove_conflicting_framebuffers();
}"
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT" "" "functions"
;;
drm_aperture_remove_conflicting_framebuffers_has_driver_arg)
#
# Determine whether drm_aperture_remove_conflicting_framebuffers
# takes a struct drm_driver * as its fourth argument.
#
# Prior to commit 97c9bfe3f6605d41eb8f1206e6e0f62b31ba15d6, the
# second argument was a char * pointer to the driver's name.
#
# To test if drm_aperture_remove_conflicting_framebuffers() has
# a req_driver argument, define a function with the expected
# signature and then define the corresponding function
# implementation with the expected signature. Successful compilation
# indicates that this function has the expected signature.
#
# This change occurred in commit 97c9bfe3f660 ("drm/aperture: Pass
# DRM driver structure instead of driver name") in v5.15
# (2021-06-29).
#
CODE="
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
#include <drm/drm_aperture.h>
#endif
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
bool primary, const struct drm_driver *req_driver)
{
return 0;
}"
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG" "" "types"
;;
drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg)
#
# Determine whether drm_aperture_remove_conflicting_framebuffers
# has its third argument as a bool.
#
# Prior to commit 62aeaeaa1b267c5149abee6b45967a5df3feed58, the
# third argument was a bool for figuring out whether the legacy vga
# stuff should be nuked, but it's only for pci devices and not
# really needed in this function.
#
# To test if drm_aperture_remove_conflicting_framebuffers() has
# a bool primary argument, define a function with the expected
# signature and then define the corresponding function
# implementation with the expected signature. Successful compilation
# indicates that this function has the expected signature.
#
# This change occurred in commit 62aeaeaa1b26 ("drm/aperture: Remove
# primary argument") in v6.5 (2023-04-16).
#
CODE="
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
#include <drm/drm_aperture.h>
#endif
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
const struct drm_driver *req_driver)
{
return 0;
}"
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG" "" "types"
;;
struct_page_has_zone_device_data)
#
# Determine if struct page has a 'zone_device_data' field.
#
# Added by commit 8a164fef9c4c ("mm: simplify ZONE_DEVICE page
# private data") in v5.3.
#
CODE="
#include <linux/mm_types.h>
int conftest_struct_page_has_zone_device_data(void) {
return offsetof(struct page, zone_device_data);
}"
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
;;
folio_test_swapcache)
#
# Determine if the folio_test_swapcache() function is present.
#
# folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
# Add folio flag manipulation functions") in v5.16.
#
CODE="
#include <linux/page-flags.h>
void conftest_folio_test_swapcache(void) {
folio_test_swapcache();
}"
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
;;
module_import_ns_takes_constant)
#
# Determine if the MODULE_IMPORT_NS macro takes a string literal
# or constant.
#
# Commit cdd30ebb1b9f ("module: Convert symbol namespace to
# string literal") changed MODULE_IMPORT_NS to take a string
# literal in Linux kernel v6.13.
#
CODE="
#include <linux/module.h>
MODULE_IMPORT_NS(DMA_BUF);"
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit. Please
# avoid specifying -rc kernels, and only use SHAs that actually exist

View File

@@ -15,6 +15,8 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_atomic_uapi.h \
drm/drm_drv.h \
drm/drm_fbdev_generic.h \
drm/drm_fbdev_ttm.h \
drm/drm_client_setup.h \
drm/drm_framebuffer.h \
drm/drm_connector.h \
drm/drm_probe_helper.h \
@@ -33,6 +35,8 @@ NV_HEADER_PRESENCE_TESTS = \
generated/autoconf.h \
generated/compile.h \
generated/utsrelease.h \
linux/aperture.h \
linux/dma-direct.h \
linux/efi.h \
linux/kconfig.h \
linux/platform/tegra/mc_utils.h \
@@ -99,5 +103,8 @@ NV_HEADER_PRESENCE_TESTS = \
linux/sync_file.h \
linux/cc_platform.h \
asm/cpufeature.h \
linux/mpi.h
linux/mpi.h \
asm/mshyperv.h \
crypto/sig.h \
linux/pfn_t.h

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -0,0 +1,120 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __NV_COMMON_UTILS_H__
#define __NV_COMMON_UTILS_H__
#include "nvtypes.h"
#include "nvmisc.h"
#if !defined(TRUE)
#define TRUE NV_TRUE
#endif
#if !defined(FALSE)
#define FALSE NV_FALSE
#endif
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
/* Get the length of a statically-sized array. */
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
#define NV_INVALID_HEAD 0xFFFFFFFF
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
#if !defined(NV_MIN)
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
#endif
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
#if !defined(NV_MAX)
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
#endif
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
{
if (val < min) {
return min;
}
if (val > max) {
return max;
}
return val;
}
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
/*
* Macros used for computing palette entries:
*
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
* of source_size bits into a value of target_size bits by shifting
* the source value into the high bits and replicating the high bits
* of the value into the low bits of the result.
*
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
* that has w bits to an appropriate entry in a LUT of 256 entries.
*/
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
int source_size,
int result_size)
{
return (val << (result_size - source_size)) |
(val >> ((source_size << 1) - result_size));
}
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
{
return NV_UNDER_REPLICATE(val, depth, 8);
}
/*
* Use __builtin_ffs where it is supported, or provide an equivalent
* implementation for platforms like riscv where it is not.
*/
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
static inline int nv_ffs(int x)
{
return __builtin_ffs(x);
}
#else
static inline int nv_ffs(int x)
{
if (x == 0)
return 0;
LOWESTBITIDX_32(x);
return 1 + x;
}
#endif
#endif /* __NV_COMMON_UTILS_H__ */

View File

@@ -62,6 +62,13 @@
#undef NV_DRM_FENCE_AVAILABLE
#endif
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_CLIENT_AVAILABLE
#endif
/*
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
* or drm_crtc_enable_color_mgmt() exist.
@@ -85,7 +92,11 @@
/* For nv_drm_gem_prime_force_fence_signal */
#ifndef spin_is_locked
#if ((__FreeBSD_version >= 1500000) && (__FreeBSD_version < 1500018)) || (__FreeBSD_version < 1401501)
#define spin_is_locked(lock) mtx_owned(lock.m)
#else
#define spin_is_locked(lock) mtx_owned(lock)
#endif
#endif
#ifndef rwsem_is_locked

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,13 @@
#include "nvtypes.h"
#include "nvkms-kapi.h"
enum nv_drm_transfer_function {
NV_DRM_TRANSFER_FUNCTION_DEFAULT,
NV_DRM_TRANSFER_FUNCTION_LINEAR,
NV_DRM_TRANSFER_FUNCTION_PQ,
NV_DRM_TRANSFER_FUNCTION_MAX,
};
struct nv_drm_crtc {
NvU32 head;
@@ -63,6 +70,8 @@ struct nv_drm_crtc {
*/
struct drm_file *modeset_permission_filep;
struct NvKmsLUTCaps olut_caps;
struct drm_crtc base;
};
@@ -142,6 +151,12 @@ struct nv_drm_crtc_state {
* nv_drm_atomic_crtc_destroy_state().
*/
struct nv_drm_flip *nv_flip;
enum nv_drm_transfer_function regamma_tf;
struct drm_property_blob *regamma_lut;
uint64_t regamma_divisor;
struct nv_drm_lut_surface *regamma_drm_lut_surface;
NvBool regamma_changed;
};
static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *state)
@@ -149,6 +164,11 @@ static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *
return container_of(state, struct nv_drm_crtc_state, base);
}
static inline const struct nv_drm_crtc_state *to_nv_crtc_state_const(const struct drm_crtc_state *state)
{
return container_of(state, struct nv_drm_crtc_state, base);
}
struct nv_drm_plane {
/**
* @base:
@@ -170,6 +190,9 @@ struct nv_drm_plane {
* Index of this plane in the per head array of layers.
*/
uint32_t layer_idx;
struct NvKmsLUTCaps ilut_caps;
struct NvKmsLUTCaps tmo_caps;
};
static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
@@ -180,6 +203,22 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
return container_of(plane, struct nv_drm_plane, base);
}
struct nv_drm_lut_surface {
struct NvKmsKapiDevice *pDevice;
struct NvKmsKapiMemory *nvkms_memory;
struct NvKmsKapiSurface *nvkms_surface;
struct {
NvU32 vssSegments;
enum NvKmsLUTVssType vssType;
NvU32 lutEntries;
enum NvKmsLUTFormat entryFormat;
} properties;
void *buffer;
struct kref refcount;
};
struct nv_drm_plane_state {
struct drm_plane_state base;
s32 __user *fd_user_ptr;
@@ -187,6 +226,20 @@ struct nv_drm_plane_state {
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property_blob *hdr_output_metadata;
#endif
struct drm_property_blob *lms_ctm;
struct drm_property_blob *lms_to_itp_ctm;
struct drm_property_blob *itp_to_lms_ctm;
struct drm_property_blob *blend_ctm;
enum nv_drm_transfer_function degamma_tf;
struct drm_property_blob *degamma_lut;
uint64_t degamma_multiplier; /* S31.32 Sign-Magnitude Format */
struct nv_drm_lut_surface *degamma_drm_lut_surface;
NvBool degamma_changed;
struct drm_property_blob *tmo_lut;
struct nv_drm_lut_surface *tmo_drm_lut_surface;
NvBool tmo_changed;
};
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)

View File

@@ -64,12 +64,25 @@
#include <drm/drm_ioctl.h>
#endif
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_LINUX_APERTURE_H_PRESENT)
#include <linux/aperture.h>
#endif
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
#include <drm/drm_aperture.h>
#endif
#if defined(NV_DRM_FBDEV_AVAILABLE)
#include <drm/drm_fb_helper.h>
#endif
#if defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/drm_client_setup.h>
#endif
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
#include <drm/drm_fbdev_ttm.h>
#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
#include <drm/drm_fbdev_generic.h>
#endif
@@ -105,16 +118,16 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
static struct nv_drm_device *dev_list = NULL;
static const char* nv_get_input_colorspace_name(
static char* nv_get_input_colorspace_name(
enum NvKmsInputColorSpace colorSpace)
{
switch (colorSpace) {
case NVKMS_INPUT_COLORSPACE_NONE:
return "None";
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
return "IEC 61966-2-2 linear FP";
return "scRGB Linear FP16";
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
return "ITU-R BT.2100-PQ YCbCr";
return "BT.2100 PQ";
default:
/* We shoudn't hit this */
WARN_ON("Unsupported input colorspace");
@@ -122,8 +135,30 @@ static const char* nv_get_input_colorspace_name(
}
};
static char* nv_get_transfer_function_name(
enum nv_drm_transfer_function tf)
{
switch (tf) {
case NV_DRM_TRANSFER_FUNCTION_LINEAR:
return "Linear";
case NV_DRM_TRANSFER_FUNCTION_PQ:
return "PQ (Perceptual Quantizer)";
default:
/* We shoudn't hit this */
WARN_ON("Unsupported transfer function");
#if defined(fallthrough)
fallthrough;
#else
/* Fallthrough */
#endif
case NV_DRM_TRANSFER_FUNCTION_DEFAULT:
return "Default";
}
};
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
static void nv_drm_output_poll_changed(struct drm_device *dev)
{
struct drm_connector *connector = NULL;
@@ -167,6 +202,7 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
nv_drm_connector_list_iter_end(&conn_iter);
#endif
}
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
static struct drm_framebuffer *nv_drm_framebuffer_create(
struct drm_device *dev,
@@ -204,7 +240,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
.atomic_check = nv_drm_atomic_check,
.atomic_commit = nv_drm_atomic_commit,
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
.output_poll_changed = nv_drm_output_poll_changed,
#endif
};
static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
@@ -364,15 +402,21 @@ static void nv_drm_enumerate_encoders_and_connectors
*/
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
{
struct drm_prop_enum_list enum_list[3] = { };
struct drm_prop_enum_list colorspace_enum_list[3] = { };
struct drm_prop_enum_list tf_enum_list[NV_DRM_TRANSFER_FUNCTION_MAX] = { };
int i, len = 0;
for (i = 0; i < 3; i++) {
enum_list[len].type = i;
enum_list[len].name = nv_get_input_colorspace_name(i);
colorspace_enum_list[len].type = i;
colorspace_enum_list[len].name = nv_get_input_colorspace_name(i);
len++;
}
for (i = 0; i < NV_DRM_TRANSFER_FUNCTION_MAX; i++) {
tf_enum_list[i].type = i;
tf_enum_list[i].name = nv_get_transfer_function_name(i);
}
if (nv_dev->supportsSyncpts) {
nv_dev->nv_out_fence_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
@@ -384,7 +428,7 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
nv_dev->nv_input_colorspace_property =
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
enum_list, len);
colorspace_enum_list, len);
if (nv_dev->nv_input_colorspace_property == NULL) {
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
return -ENOMEM;
@@ -399,6 +443,109 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
}
#endif
nv_dev->nv_plane_lms_ctm_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_LMS_CTM", 0);
if (nv_dev->nv_plane_lms_ctm_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_lms_to_itp_ctm_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_LMS_TO_ITP_CTM", 0);
if (nv_dev->nv_plane_lms_to_itp_ctm_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_itp_to_lms_ctm_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_ITP_TO_LMS_CTM", 0);
if (nv_dev->nv_plane_itp_to_lms_ctm_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_blend_ctm_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_BLEND_CTM", 0);
if (nv_dev->nv_plane_blend_ctm_property == NULL) {
return -ENOMEM;
}
// Degamma TF + LUT + LUT Size + Multiplier
nv_dev->nv_plane_degamma_tf_property =
drm_property_create_enum(nv_dev->dev, 0,
"NV_PLANE_DEGAMMA_TF", tf_enum_list,
NV_DRM_TRANSFER_FUNCTION_MAX);
if (nv_dev->nv_plane_degamma_tf_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_degamma_lut_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_DEGAMMA_LUT", 0);
if (nv_dev->nv_plane_degamma_lut_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_degamma_lut_size_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
"NV_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX);
if (nv_dev->nv_plane_degamma_lut_size_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_degamma_multiplier_property =
drm_property_create_range(nv_dev->dev, 0,
"NV_PLANE_DEGAMMA_MULTIPLIER", 0,
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
if (nv_dev->nv_plane_degamma_multiplier_property == NULL) {
return -ENOMEM;
}
// TMO LUT + LUT Size
nv_dev->nv_plane_tmo_lut_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_PLANE_TMO_LUT", 0);
if (nv_dev->nv_plane_tmo_lut_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_plane_tmo_lut_size_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
"NV_PLANE_TMO_LUT_SIZE", 0, UINT_MAX);
if (nv_dev->nv_plane_tmo_lut_size_property == NULL) {
return -ENOMEM;
}
// REGAMMA TF + LUT + LUT Size + Divisor
nv_dev->nv_crtc_regamma_tf_property =
drm_property_create_enum(nv_dev->dev, 0,
"NV_CRTC_REGAMMA_TF", tf_enum_list,
NV_DRM_TRANSFER_FUNCTION_MAX);
if (nv_dev->nv_crtc_regamma_tf_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_crtc_regamma_lut_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_CRTC_REGAMMA_LUT", 0);
if (nv_dev->nv_crtc_regamma_lut_property == NULL) {
return -ENOMEM;
}
nv_dev->nv_crtc_regamma_lut_size_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
"NV_CRTC_REGAMMA_LUT_SIZE", 0, UINT_MAX);
if (nv_dev->nv_crtc_regamma_lut_size_property == NULL) {
return -ENOMEM;
}
// S31.32
nv_dev->nv_crtc_regamma_divisor_property =
drm_property_create_range(nv_dev->dev, 0,
"NV_CRTC_REGAMMA_DIVISOR",
(((NvU64) 1) << 32), // No values between 0 and 1
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
if (nv_dev->nv_crtc_regamma_divisor_property == NULL) {
return -ENOMEM;
}
return 0;
}
@@ -476,7 +623,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
return -ENODEV;
}
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
/*
* If fbdev is enabled, take modeset ownership now before other DRM clients
* can take master (and thus NVKMS ownership).
@@ -548,6 +695,13 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
ret = nv_drm_create_properties(nv_dev);
if (ret < 0) {
drm_mode_config_cleanup(dev);
#if defined(NV_DRM_FBDEV_AVAILABLE)
if (nv_dev->hasFramebufferConsole) {
nvKms->releaseOwnership(nv_dev->pDevice);
}
#endif
nvKms->freeDevice(nv_dev->pDevice);
return -ENODEV;
}
@@ -610,7 +764,7 @@ static void __nv_drm_unload(struct drm_device *dev)
/* Release modeset ownership if fbdev is enabled */
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
if (nv_dev->hasFramebufferConsole) {
drm_atomic_helper_shutdown(dev);
nvKms->releaseOwnership(nv_dev->pDevice);
@@ -710,36 +864,37 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
#endif
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
int err;
nv_drm_revoke_modeset_permission(dev, file_priv, 0);
nv_drm_revoke_sub_ownership(dev);
/*
* After dropping nvkms modeset onwership, it is not guaranteed that
* drm and nvkms modeset state will remain in sync. Therefore, disable
* all outputs and crtcs before dropping nvkms modeset ownership.
*
* First disable all active outputs atomically and then disable each crtc one
* by one, there is not helper function available to disable all crtcs
* atomically.
*/
drm_modeset_lock_all(dev);
if ((err = nv_drm_atomic_helper_disable_all(
dev,
dev->mode_config.acquire_ctx)) != 0) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"nv_drm_atomic_helper_disable_all failed with error code %d !",
err);
}
drm_modeset_unlock_all(dev);
if (!nv_dev->hasFramebufferConsole) {
int err;
/*
* After dropping nvkms modeset onwership, it is not guaranteed that drm
* and nvkms modeset state will remain in sync. Therefore, disable all
* outputs and crtcs before dropping nvkms modeset ownership.
*
* First disable all active outputs atomically and then disable each
* crtc one by one, there is not helper function available to disable
* all crtcs atomically.
*/
drm_modeset_lock_all(dev);
if ((err = nv_drm_atomic_helper_disable_all(
dev,
dev->mode_config.acquire_ctx)) != 0) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"nv_drm_atomic_helper_disable_all failed with error code %d !",
err);
}
drm_modeset_unlock_all(dev);
nvKms->releaseOwnership(nv_dev->pDevice);
}
}
@@ -1567,6 +1722,10 @@ static const struct file_operations nv_drm_fops = {
.read = drm_read,
.llseek = noop_llseek,
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
.fop_flags = FOP_UNSIGNED_OFFSET,
#endif
};
static const struct drm_ioctl_desc nv_drm_ioctls[] = {
@@ -1684,14 +1843,19 @@ static struct drm_driver nv_drm_driver = {
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
/*
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle().
* Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
* all drivers") made drm_gem_prime_handle_to_fd() /
* drm_gem_prime_fd_to_handle() the default when .prime_handle_to_fd /
* .prime_fd_to_handle are unspecified, respectively.
*
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
* import/export for all drivers") made these helpers the default when
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
* to just skip specifying them if the helpers aren't present.
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers for
* fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle(). However, because of the aforementioned commit,
* it's fine to just skip specifying them in this case.
*
* Linux kernel v6.7 commit 0514f63cfff3 ("Revert "drm/prime: Unexport helpers
* for fd/handle conversion"") exported the helpers again, but left the default
* behavior intact. Nonetheless, it does not hurt to specify them.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
@@ -1703,6 +1867,21 @@ static struct drm_driver nv_drm_driver = {
.gem_prime_import = nv_drm_gem_prime_import,
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
/*
* Linux kernel v5.0 commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()")
* added drm_gem_prime_mmap().
*
* Linux kernel v6.6 commit 0adec22702d4 ("drm: Remove struct
* drm_driver.gem_prime_mmap") removed .gem_prime_mmap, but replaced it with a
* direct call to drm_gem_prime_mmap().
*
* TODO: Support .gem_prime_mmap on Linux < v5.0 using internal implementation.
*/
#if defined(NV_DRM_GEM_PRIME_MMAP_PRESENT) && \
defined(NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP)
.gem_prime_mmap = drm_gem_prime_mmap,
#endif
#if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_CALLBACKS)
.gem_prime_export = drm_gem_prime_export,
.gem_prime_get_sg_table = nv_drm_gem_prime_get_sg_table,
@@ -1743,6 +1922,9 @@ static struct drm_driver nv_drm_driver = {
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
#endif
#if defined(DRM_FBDEV_TTM_DRIVER_OPS)
DRM_FBDEV_TTM_DRIVER_OPS,
#endif
};
@@ -1838,22 +2020,35 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
goto failed_drm_register;
}
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
if (nv_drm_fbdev_module_param &&
drm_core_check_feature(dev, DRIVER_MODESET)) {
if (bus_is_pci) {
struct pci_dev *pdev = to_pci_dev(device);
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
drm_aperture_remove_conflicting_pci_framebuffers(pdev, &nv_drm_driver);
#else
drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
#endif
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT)
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
#endif
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
}
#if defined(NV_DRM_CLIENT_AVAILABLE)
drm_client_setup(dev, NULL);
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
drm_fbdev_ttm_setup(dev, 32);
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
drm_fbdev_generic_setup(dev, 32);
#endif
}
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */
/* Add NVIDIA-DRM device into list */
@@ -1995,12 +2190,12 @@ void nv_drm_suspend_resume(NvBool suspend)
if (suspend) {
drm_kms_helper_poll_disable(dev);
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
#endif
drm_mode_config_reset(dev);
} else {
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
#endif
drm_kms_helper_poll_enable(dev);

View File

@@ -36,12 +36,15 @@
static void __nv_drm_framebuffer_free(struct nv_drm_framebuffer *nv_fb)
{
struct drm_framebuffer *fb = &nv_fb->base;
uint32_t i;
/* Unreference gem object */
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
if (nv_fb->nv_gem[i] != NULL) {
nv_drm_gem_object_unreference_unlocked(nv_fb->nv_gem[i]);
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
if (gem != NULL) {
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(gem);
nv_drm_gem_object_unreference_unlocked(nv_gem);
}
}
@@ -69,10 +72,8 @@ static int
nv_drm_framebuffer_create_handle(struct drm_framebuffer *fb,
struct drm_file *file, unsigned int *handle)
{
struct nv_drm_framebuffer *nv_fb = to_nv_framebuffer(fb);
return nv_drm_gem_handle_create(file,
nv_fb->nv_gem[0],
to_nv_gem_object(nv_fb_get_gem_obj(fb, 0)),
handle);
}
@@ -88,6 +89,7 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct nv_drm_framebuffer *nv_fb;
struct nv_drm_gem_object *nv_gem;
const int num_planes = nv_drm_format_num_planes(cmd->pixel_format);
uint32_t i;
@@ -101,21 +103,22 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
return ERR_PTR(-ENOMEM);
}
if (num_planes > ARRAY_SIZE(nv_fb->nv_gem)) {
if (num_planes > NVKMS_MAX_PLANES_PER_SURFACE) {
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Unsupported number of planes");
goto failed;
}
for (i = 0; i < num_planes; i++) {
if ((nv_fb->nv_gem[i] = nv_drm_gem_object_lookup(
dev,
file,
cmd->handles[i])) == NULL) {
nv_gem = nv_drm_gem_object_lookup(dev, file, cmd->handles[i]);
if (nv_gem == NULL) {
NV_DRM_DEV_DEBUG_DRIVER(
nv_dev,
"Failed to find gem object of type nvkms memory");
goto failed;
}
nv_fb_set_gem_obj(&nv_fb->base, i, &nv_gem->base);
}
return nv_fb;
@@ -135,12 +138,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
struct NvKmsKapiCreateSurfaceParams params = { };
struct nv_drm_gem_object *nv_gem;
struct drm_framebuffer *fb = &nv_fb->base;
uint32_t i;
int ret;
/* Initialize the base framebuffer object and add it to drm subsystem */
ret = drm_framebuffer_init(dev, &nv_fb->base, &nv_framebuffer_funcs);
ret = drm_framebuffer_init(dev, fb, &nv_framebuffer_funcs);
if (ret != 0) {
NV_DRM_DEV_DEBUG_DRIVER(
nv_dev,
@@ -148,23 +153,32 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
return ret;
}
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
if (nv_fb->nv_gem[i] != NULL) {
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
nv_fb->nv_gem[i]->pMemory)) {
NV_DRM_DEV_LOG_INFO(
nv_dev,
"Framebuffer memory not appropriate for scanout");
goto fail;
}
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
if (gem != NULL) {
nv_gem = to_nv_gem_object(gem);
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
params.planes[i].offset = nv_fb->base.offsets[i];
params.planes[i].pitch = nv_fb->base.pitches[i];
params.planes[i].memory = nv_gem->pMemory;
params.planes[i].offset = fb->offsets[i];
params.planes[i].pitch = fb->pitches[i];
/*
* XXX Use drm_framebuffer_funcs.dirty and
* drm_fb_helper_funcs.fb_dirty instead
*
* Currently using noDisplayCaching when registering surfaces with
* NVKMS that are using memory allocated through the DRM
* Dumb-Buffers API. This prevents Display Idle Frame Rate from
* kicking in and preventing CPU updates to the surface memory from
* not being reflected on the display. Ideally, DIFR would be
* dynamically disabled whenever a user of the memory blits to the
* frontbuffer. DRM provides the needed callbacks to achieve this.
*/
params.noDisplayCaching |= !!nv_gem->is_drm_dumb;
}
}
params.height = nv_fb->base.height;
params.width = nv_fb->base.width;
params.height = fb->height;
params.width = fb->width;
params.format = format;
if (have_modifier) {
@@ -199,7 +213,7 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
return 0;
fail:
drm_framebuffer_cleanup(&nv_fb->base);
drm_framebuffer_cleanup(fb);
return -EINVAL;
}

View File

@@ -41,8 +41,10 @@
struct nv_drm_framebuffer {
struct NvKmsKapiSurface *pSurface;
struct nv_drm_gem_object*
nv_gem[NVKMS_MAX_PLANES_PER_SURFACE];
#if !defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
struct drm_gem_object*
obj[NVKMS_MAX_PLANES_PER_SURFACE];
#endif
struct drm_framebuffer base;
};
@@ -56,6 +58,29 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
return container_of(fb, struct nv_drm_framebuffer, base);
}
static inline struct drm_gem_object *nv_fb_get_gem_obj(
struct drm_framebuffer *fb,
uint32_t plane)
{
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
return fb->obj[plane];
#else
return to_nv_framebuffer(fb)->obj[plane];
#endif
}
static inline void nv_fb_set_gem_obj(
struct drm_framebuffer *fb,
uint32_t plane,
struct drm_gem_object *obj)
{
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
fb->obj[plane] = obj;
#else
to_nv_framebuffer(fb)->obj[plane] = obj;
#endif
}
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
struct drm_device *dev,
struct drm_file *file,

View File

@@ -71,9 +71,20 @@ static void __nv_drm_gem_nvkms_memory_free(struct nv_drm_gem_object *nv_gem)
nv_drm_free(nv_nvkms_memory);
}
static int __nv_drm_gem_nvkms_map(
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory);
static int __nv_drm_gem_nvkms_mmap(struct nv_drm_gem_object *nv_gem,
struct vm_area_struct *vma)
{
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ret;
}
return drm_gem_mmap_obj(&nv_gem->base,
drm_vma_node_size(&nv_gem->base.vma_node) << PAGE_SHIFT, vma);
}
@@ -146,11 +157,18 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
static int __nv_drm_gem_nvkms_map(
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
{
int ret = 0;
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
if (!nv_dev->hasVideoMemory) {
return 0;
mutex_lock(&nv_nvkms_memory->map_lock);
if (nv_nvkms_memory->physically_mapped) {
goto done;
}
if (!nvKms->isVidmem(pMemory)) {
goto done;
}
if (!nvKms->mapMemory(nv_dev->pDevice,
@@ -161,7 +179,8 @@ static int __nv_drm_gem_nvkms_map(
nv_dev,
"Failed to map NvKmsKapiMemory 0x%p",
pMemory);
return -ENOMEM;
ret = -ENOMEM;
goto done;
}
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
@@ -177,7 +196,9 @@ static int __nv_drm_gem_nvkms_map(
nv_nvkms_memory->physically_mapped = true;
return 0;
done:
mutex_unlock(&nv_nvkms_memory->map_lock);
return ret;
}
static void *__nv_drm_gem_nvkms_prime_vmap(
@@ -186,14 +207,40 @@ static void *__nv_drm_gem_nvkms_prime_vmap(
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
if (!nv_nvkms_memory->physically_mapped) {
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ERR_PTR(ret);
}
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ERR_PTR(ret);
}
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
if (nv_nvkms_memory->physically_mapped) {
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
}
/*
* If this buffer isn't physically mapped, it might be backed by struct
* pages. Use vmap in that case. Do a noncached mapping for system memory
* as display is non io-coherent device in case of Tegra.
*/
if (nv_nvkms_memory->pages_count > 0) {
return nv_drm_vmap(nv_nvkms_memory->pages,
nv_nvkms_memory->pages_count,
false);
}
return ERR_PTR(-ENOMEM);
}
static void __nv_drm_gem_nvkms_prime_vunmap(
struct nv_drm_gem_object *nv_gem,
void *address)
{
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
if (!nv_nvkms_memory->physically_mapped &&
nv_nvkms_memory->pages_count > 0) {
nv_drm_vunmap(address);
}
}
static int __nv_drm_gem_map_nvkms_memory_offset(
@@ -201,17 +248,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
struct nv_drm_gem_object *nv_gem,
uint64_t *offset)
{
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
to_nv_nvkms_memory(nv_gem);
if (!nv_nvkms_memory->physically_mapped) {
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
if (ret) {
return ret;
}
}
return nv_drm_gem_create_mmap_offset(&nv_nvkms_memory->base, offset);
return nv_drm_gem_create_mmap_offset(nv_gem, offset);
}
static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
@@ -223,7 +260,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
struct sg_table *sg_table;
if (nv_nvkms_memory->pages_count == 0) {
NV_DRM_DEV_LOG_ERR(
NV_DRM_DEV_DEBUG_DRIVER(
nv_dev,
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
nv_gem->pMemory);
@@ -241,6 +278,7 @@ const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
.free = __nv_drm_gem_nvkms_memory_free,
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
.prime_vunmap = __nv_drm_gem_nvkms_prime_vunmap,
.mmap = __nv_drm_gem_nvkms_mmap,
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
@@ -265,6 +303,7 @@ static int __nv_drm_nvkms_gem_obj_init(
return -EINVAL;
}
mutex_init(&nv_nvkms_memory->map_lock);
nv_nvkms_memory->pPhysicalAddress = NULL;
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
nv_nvkms_memory->physically_mapped = false;
@@ -273,7 +312,7 @@ static int __nv_drm_nvkms_gem_obj_init(
pMemory,
&pages,
&numPages) &&
!nv_dev->hasVideoMemory) {
!nvKms->isVidmem(pMemory)) {
/* GetMemoryPages may fail for vidmem allocations,
* but it should not fail for sysmem allocations. */
NV_DRM_DEV_LOG_ERR(nv_dev,
@@ -346,6 +385,8 @@ int nv_drm_dumb_create(
goto nvkms_gem_obj_init_failed;
}
nv_nvkms_memory->base.is_drm_dumb = true;
/* Always map dumb buffer memory up front. Clients are only expected
* to use dumb buffers for software rendering, so they're not much use
* without a CPU mapping.

View File

@@ -32,8 +32,15 @@
struct nv_drm_gem_nvkms_memory {
struct nv_drm_gem_object base;
/*
* Lock to protect concurrent writes to physically_mapped, pPhysicalAddress,
* and pWriteCombinedIORemapAddress.
*
* __nv_drm_gem_nvkms_map(), the sole writer, is structured such that
* readers are not required to hold the lock.
*/
struct mutex map_lock;
bool physically_mapped;
void *pPhysicalAddress;
void *pWriteCombinedIORemapAddress;

View File

@@ -36,6 +36,10 @@
#include "linux/mm.h"
#include "nv-mm.h"
#if defined(NV_LINUX_PFN_T_H_PRESENT)
#include "linux/pfn_t.h"
#endif
#if defined(NV_BSD)
#include <vm/vm_pageout.h>
#endif
@@ -68,7 +72,8 @@ static void *__nv_drm_gem_user_memory_prime_vmap(
struct nv_drm_gem_user_memory *nv_user_memory = to_nv_user_memory(nv_gem);
return nv_drm_vmap(nv_user_memory->pages,
nv_user_memory->pages_count);
nv_user_memory->pages_count,
true);
}
static void __nv_drm_gem_user_memory_prime_vunmap(
@@ -103,6 +108,37 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
return 0;
}
#if defined(NV_LINUX) && !defined(NV_VMF_INSERT_MIXED_PRESENT)
static vm_fault_t __nv_vm_insert_mixed_helper(
struct vm_area_struct *vma,
unsigned long address,
unsigned long pfn)
{
int ret;
#if defined(NV_PFN_TO_PFN_T_PRESENT)
ret = vm_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
#else
ret = vm_insert_mixed(vma, address, pfn);
#endif
switch (ret) {
case 0:
case -EBUSY:
/*
* EBUSY indicates that another thread already handled
* the faulted range.
*/
return VM_FAULT_NOPAGE;
case -ENOMEM:
return VM_FAULT_OOM;
default:
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
return VM_FAULT_SIGBUS;
}
}
#endif
static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
struct nv_drm_gem_object *nv_gem,
struct vm_area_struct *vma,
@@ -112,36 +148,19 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
unsigned long address = nv_page_fault_va(vmf);
struct drm_gem_object *gem = vma->vm_private_data;
unsigned long page_offset;
vm_fault_t ret;
unsigned long pfn;
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
BUG_ON(page_offset >= nv_user_memory->pages_count);
pfn = page_to_pfn(nv_user_memory->pages[page_offset]);
#if !defined(NV_LINUX)
ret = vmf_insert_pfn(vma, address, page_to_pfn(nv_user_memory->pages[page_offset]));
#else /* !defined(NV_LINUX) */
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
switch (ret) {
case 0:
case -EBUSY:
/*
* EBUSY indicates that another thread already handled
* the faulted range.
*/
ret = VM_FAULT_NOPAGE;
break;
case -ENOMEM:
ret = VM_FAULT_OOM;
break;
default:
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
ret = VM_FAULT_SIGBUS;
break;
}
#endif /* !defined(NV_LINUX) */
return ret;
return vmf_insert_pfn(vma, address, pfn);
#elif defined(NV_VMF_INSERT_MIXED_PRESENT)
return vmf_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
#else
return __nv_vm_insert_mixed_helper(vma, address, pfn);
#endif
}
static int __nv_drm_gem_user_create_mmap_offset(

View File

@@ -144,6 +144,12 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
#endif
drm_gem_private_object_init(dev, &nv_gem->base, size);
/* Create mmap offset early for drm_gem_prime_mmap(), if possible. */
if (nv_gem->ops->create_mmap_offset) {
uint64_t offset;
nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, &offset);
}
}
struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
@@ -166,8 +172,11 @@ struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
*/
gem_dst = nv_gem_src->ops->prime_dup(dev, nv_gem_src);
if (gem_dst)
return gem_dst;
if (gem_dst == NULL) {
return ERR_PTR(-ENOTSUPP);
}
return gem_dst;
}
}
#endif /* NV_DMA_BUF_OWNER_PRESENT */
@@ -232,6 +241,7 @@ int nv_drm_gem_map_offset_ioctl(struct drm_device *dev,
return -EINVAL;
}
/* mmap offset creation is idempotent, fetch it by creating it again. */
if (nv_gem->ops->create_mmap_offset) {
ret = nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, &params->offset);
} else {

View File

@@ -73,6 +73,8 @@ struct nv_drm_gem_object {
struct NvKmsKapiMemory *pMemory;
bool is_drm_dumb;
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
nv_dma_resv_t resv;
#endif

View File

@@ -40,8 +40,13 @@
#include <drm/drm_blend.h>
#endif
#if defined(NV_DRM_ROTATION_AVAILABLE)
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
#if defined(NV_DRM_ROTATION_AVAILABLE) || \
defined(NV_DRM_COLOR_CTM_3X4_PRESENT) || \
defined(NV_DRM_COLOR_LUT_PRESENT)
/*
* For DRM_MODE_ROTATE_*, DRM_MODE_REFLECT_*, struct drm_color_ctm_3x4, and
* struct drm_color_lut.
*/
#include <uapi/drm/drm_mode.h>
#endif
@@ -358,6 +363,24 @@ static inline void nv_drm_connector_put(struct drm_connector *connector)
#endif
}
static inline void nv_drm_property_blob_put(struct drm_property_blob *blob)
{
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
drm_property_blob_put(blob);
#else
drm_property_unreference_blob(blob);
#endif
}
static inline void nv_drm_property_blob_get(struct drm_property_blob *blob)
{
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
drm_property_blob_get(blob);
#else
drm_property_reference_blob(blob);
#endif
}
static inline struct drm_crtc *
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
{
@@ -625,6 +648,31 @@ static inline int nv_drm_format_num_planes(uint32_t format)
#define DRM_UNLOCKED 0
#endif
/*
* struct drm_color_ctm_3x4 was added by commit 6872a189be50 ("drm/amd/display:
* Add 3x4 CTM support for plane CTM") in v6.8. For backwards compatibility,
* define it when not present.
*/
#if !defined(NV_DRM_COLOR_CTM_3X4_PRESENT)
struct drm_color_ctm_3x4 {
__u64 matrix[12];
};
#endif
/*
* struct drm_color_lut was added by commit 5488dc16fde7 ("drm: introduce pipe
* color correction properties") in v4.6. For backwards compatibility, define it
* when not present.
*/
#if !defined(NV_DRM_COLOR_LUT_PRESENT)
struct drm_color_lut {
__u16 red;
__u16 green;
__u16 blue;
__u16 reserved;
};
#endif
/*
* drm_vma_offset_exact_lookup_locked() were added
* by kernel commit 2225cfe46bcc which was Signed-off-by:

View File

@@ -34,10 +34,10 @@ MODULE_PARM_DESC(
"Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
MODULE_PARM_DESC(
fbdev,
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
"Create a framebuffer device (1 = enable (default), 0 = disable)");
module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
#endif

View File

@@ -192,6 +192,7 @@ static int __nv_drm_convert_in_fences(
&to_nv_crtc_state(crtc_state)->req_config;
struct nv_drm_plane_fence_cb_data *fence_data;
uint32_t semaphore_index;
uint32_t idx_count;
int ret, i;
if (!crtc_state->active) {
@@ -244,9 +245,14 @@ static int __nv_drm_convert_in_fences(
return -EINVAL;
}
semaphore_index = nv_drm_next_display_semaphore(nv_dev);
for (idx_count = 0; idx_count < nv_dev->display_semaphores.count; idx_count++) {
semaphore_index = nv_drm_next_display_semaphore(nv_dev);
if (nvKms->tryInitDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
break;
}
}
if (!nvKms->resetDisplaySemaphore(nv_dev->pDevice, semaphore_index)) {
if (idx_count == nv_dev->display_semaphores.count) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Failed to initialize semaphore for plane fence");
@@ -408,6 +414,31 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
return -EINVAL;
}
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
if (commit) {
/*
* This function does what is necessary to prepare the framebuffers
* attached to each new plane in the state for scan out, mostly by
* calling back into driver callbacks the NVIDIA driver does not
* provide. The end result is that all it does on the NVIDIA driver
* is populate the plane state's dma fence pointers with any implicit
* sync fences attached to the GEM objects associated with those planes
* in the new state, prefering explicit sync fences when appropriate.
* This must be done prior to converting the per-plane fences to
* semaphore waits below.
*
* Note this only works when the drm_framebuffer:obj[] field is present
* and populated, so skip calling this function on kernels where that
* field is not present.
*/
ret = drm_atomic_helper_prepare_planes(dev, state);
if (ret) {
return ret;
}
}
#endif /* defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT) */
memset(requested_config, 0, sizeof(*requested_config));
/* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */

View File

@@ -42,7 +42,7 @@
#endif
bool nv_drm_modeset_module_param = false;
bool nv_drm_fbdev_module_param = false;
bool nv_drm_fbdev_module_param = true;
void *nv_drm_calloc(size_t nmemb, size_t size)
{
@@ -156,9 +156,15 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
#define VM_USERMAP 0
#endif
void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached)
{
return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
pgprot_t prot = PAGE_KERNEL;
if (!cached) {
prot = pgprot_noncached(PAGE_KERNEL);
}
return vmap(pages, pages_count, VM_USERMAP, prot);
}
void nv_drm_vunmap(void *address)

View File

@@ -59,14 +59,20 @@ typedef struct nv_timer nv_drm_timer;
#endif
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
#endif
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_FBDEV_TTM_AVAILABLE
#endif
struct page;
/* Set to true when the atomic modeset feature is enabled. */
extern bool nv_drm_modeset_module_param;
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
#if defined(NV_DRM_FBDEV_AVAILABLE)
/* Set to true when the nvidia-drm driver should install a framebuffer device */
extern bool nv_drm_fbdev_module_param;
#endif
@@ -84,7 +90,7 @@ int nv_drm_lock_user_pages(unsigned long address,
void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages);
void *nv_drm_vmap(struct page **pages, unsigned long pages_count);
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached);
void nv_drm_vunmap(void *address);

View File

@@ -163,6 +163,24 @@ struct nv_drm_device {
struct drm_property *nv_hdr_output_metadata_property;
#endif
struct drm_property *nv_plane_lms_ctm_property;
struct drm_property *nv_plane_lms_to_itp_ctm_property;
struct drm_property *nv_plane_itp_to_lms_ctm_property;
struct drm_property *nv_plane_blend_ctm_property;
struct drm_property *nv_plane_degamma_tf_property;
struct drm_property *nv_plane_degamma_lut_property;
struct drm_property *nv_plane_degamma_lut_size_property;
struct drm_property *nv_plane_degamma_multiplier_property;
struct drm_property *nv_plane_tmo_lut_property;
struct drm_property *nv_plane_tmo_lut_size_property;
struct drm_property *nv_crtc_regamma_tf_property;
struct drm_property *nv_crtc_regamma_lut_property;
struct drm_property *nv_crtc_regamma_lut_size_property;
struct drm_property *nv_crtc_regamma_divisor_property;
struct nv_drm_device *next;
};

View File

@@ -66,11 +66,18 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
@@ -130,3 +137,10 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffe
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -86,6 +86,12 @@ module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
static bool opportunistic_display_sync = true;
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
static enum NvKmsDebugForceColorSpace debug_force_color_space = NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
module_param_named(debug_force_color_space, debug_force_color_space, uint, 0400);
static bool enable_overlay_layers = true;
module_param_named(enable_overlay_layers, enable_overlay_layers, bool, 0400);
/* These parameters are used for fault injection tests. Normally the defaults
* should be used. */
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@@ -96,19 +102,40 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
static bool malloc_verbose = false;
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
static int fail_alloc_core_channel_method = -1;
module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);
#if NVKMS_CONFIG_FILE_SUPPORTED
/* This parameter is used to find the dpy override conf file */
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
MODULE_PARM_DESC(config_file,
"Path to the nvidia-modeset configuration file "
"(default: disabled)");
"Path to the nvidia-modeset configuration file (default: disabled)");
static char *nvkms_conf = NULL;
module_param_named(config_file, nvkms_conf, charp, 0400);
#endif
static atomic_t nvkms_alloc_called_count;
NvBool nvkms_test_fail_alloc_core_channel(
enum FailAllocCoreChannelMethod method
)
{
if (method != fail_alloc_core_channel_method) {
// don't fail if it's not the currently specified method
return NV_FALSE;
}
printk(KERN_INFO NVKMS_LOG_PREFIX
"Failing core channel allocation using method %d",
fail_alloc_core_channel_method);
return NV_TRUE;
}
NvBool nvkms_output_rounding_fix(void)
{
return output_rounding_fix;
@@ -139,6 +166,19 @@ NvBool nvkms_opportunistic_display_sync(void)
return opportunistic_display_sync;
}
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void)
{
if (debug_force_color_space >= NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX) {
return NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
}
return debug_force_color_space;
}
NvBool nvkms_enable_overlay_layers(void)
{
return enable_overlay_layers;
}
NvBool nvkms_kernel_supports_syncpts(void)
{
/*
@@ -1084,7 +1124,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
nvKmsKapiHandleEventQueueChange(device);
}
struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
struct NvKmsKapiDevice *device,
int *status)
{
@@ -1136,7 +1176,7 @@ failed:
return NULL;
}
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
{
/*
* Don't use down_interruptible(): we need to free resources
@@ -1199,7 +1239,7 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
}
}
int nvkms_ioctl_common
static int nvkms_ioctl_common
(
struct nvkms_per_open *popen,
NvU32 cmd, NvU64 address, const size_t size
@@ -1452,6 +1492,8 @@ static size_t nvkms_config_file_open
loff_t pos = 0;
#endif
*buff = NULL;
if (!nvkms_fs_mounted()) {
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
return 0;
@@ -1475,6 +1517,11 @@ static size_t nvkms_config_file_open
goto done;
}
// Do not alloc a 0 sized buffer
if (file_size == 0) {
goto done;
}
*buff = nvkms_alloc(file_size, NV_FALSE);
if (*buff == NULL) {
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
@@ -1558,6 +1605,48 @@ NvBool nvKmsKapiGetFunctionsTable
}
EXPORT_SYMBOL(nvKmsKapiGetFunctionsTable);
NvU32 nvKmsKapiF16ToF32(NvU16 a)
{
return nvKmsKapiF16ToF32Internal(a);
}
EXPORT_SYMBOL(nvKmsKapiF16ToF32);
NvU16 nvKmsKapiF32ToF16(NvU32 a)
{
return nvKmsKapiF32ToF16Internal(a);
}
EXPORT_SYMBOL(nvKmsKapiF32ToF16);
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b)
{
return nvKmsKapiF32MulInternal(a, b);
}
EXPORT_SYMBOL(nvKmsKapiF32Mul);
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b)
{
return nvKmsKapiF32DivInternal(a, b);
}
EXPORT_SYMBOL(nvKmsKapiF32Div);
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b)
{
return nvKmsKapiF32AddInternal(a, b);
}
EXPORT_SYMBOL(nvKmsKapiF32Add);
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact)
{
return nvKmsKapiF32ToUI32RMinMagInternal(a, exact);
}
EXPORT_SYMBOL(nvKmsKapiF32ToUI32RMinMag);
NvU32 nvKmsKapiUI32ToF32(NvU32 a)
{
return nvKmsKapiUI32ToF32Internal(a);
}
EXPORT_SYMBOL(nvKmsKapiUI32ToF32);
/*************************************************************************
* File operation callback functions.
*************************************************************************/

View File

@@ -67,6 +67,14 @@ enum NvKmsSyncPtOp {
NVKMS_SYNCPT_OP_READ_MINVAL,
};
enum NvKmsDebugForceColorSpace {
NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE,
NVKMS_DEBUG_FORCE_COLOR_SPACE_RGB,
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV444,
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV422,
NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX,
};
typedef struct {
struct {
@@ -96,12 +104,20 @@ typedef struct {
} read_minval;
} NvKmsSyncPtOpParams;
enum FailAllocCoreChannelMethod {
FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
};
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
NvBool nvkms_output_rounding_fix(void);
NvBool nvkms_disable_hdmi_frl(void);
NvBool nvkms_disable_vrr_memclk_switch(void);
NvBool nvkms_hdmi_deepcolor(void);
NvBool nvkms_vblank_sem_control(void);
NvBool nvkms_opportunistic_display_sync(void);
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
NvBool nvkms_enable_overlay_layers(void);
void nvkms_call_rm (void *ops);
void* nvkms_alloc (size_t size,

View File

@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
quiet_cmd_symlink = SYMLINK $@
cmd_symlink = ln -sf $< $@
targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
$(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE

View File

@@ -110,4 +110,18 @@ NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);
NvBool nvKmsOpenDevHasSubOwnerPermissionOrBetter(const struct NvKmsPerOpenDev *pOpenDev);
NvU32 nvKmsKapiF16ToF32Internal(NvU16 a);
NvU16 nvKmsKapiF32ToF16Internal(NvU32 a);
NvU32 nvKmsKapiF32MulInternal(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32DivInternal(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32AddInternal(NvU32 a, NvU32 b);
NvU32 nvKmsKapiF32ToUI32RMinMagInternal(NvU32 a, NvBool exact);
NvU32 nvKmsKapiUI32ToF32Internal(NvU32 a);
#endif /* __NV_KMS_H__ */

View File

@@ -189,6 +189,12 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
struct nvidia_p2p_page_table **page_table,
void (*free_callback)(void *data), void *data);
/*
* Flags to be used with persistent APIs
*/
#define NVIDIA_P2P_FLAGS_DEFAULT 0
#define NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING 1
/*
* @brief
* Pin and make the pages underlying a range of GPU virtual memory
@@ -212,7 +218,11 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
* @param[out] page_table
* A pointer to an array of structures with P2P PTEs.
* @param[in] flags
* Must be set to zero for now.
* NVIDIA_P2P_FLAGS_DEFAULT:
* Default value to be used if no specific behavior is expected.
* NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING:
* Force BAR1 mappings on certain coherent platforms,
* subject to capability and supported topology.
*
* @return
* 0 upon successful completion.

View File

@@ -1,30 +1,25 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc365_h_
#define _clc365_h_

View File

@@ -1,30 +1,25 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc369_h_
#define _clc369_h_

View File

@@ -1,26 +1,25 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc36f_h_
#define _clc36f_h_
@@ -257,7 +256,6 @@ typedef volatile struct Nvc36fControl_struct {
#define NVC36F_CLEAR_FAULTED_TYPE 31:31
#define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC36F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */

View File

@@ -1,26 +1,25 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc46f_h_
#define _clc46f_h_
@@ -259,7 +258,6 @@ typedef volatile struct Nvc46fControl_struct {
#define NVC46F_CLEAR_FAULTED_TYPE 31:31
#define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC46F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */

View File

@@ -1,26 +1,25 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc56f_h_
#define _clc56f_h_
@@ -261,7 +260,6 @@ typedef volatile struct Nvc56fControl_struct {
#define NVC56F_CLEAR_FAULTED_TYPE 31:31
#define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC56F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc5b5_h_
@@ -34,64 +32,6 @@ extern "C" {
#define TURING_DMA_COPY_A (0x0000C5B5)
typedef volatile struct _clc5b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x3F];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 Reserved03[0x2];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved04[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved05[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved08[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved09[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved10[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved11[0x3BA];
} turing_dma_copy_aControlPio;
#define NVC5B5_NOP (0x00000100)
#define NVC5B5_NOP_PARAMETER 31:0
#define NVC5B5_PM_TRIGGER (0x00000140)
@@ -125,14 +65,6 @@ typedef volatile struct _clc5b5_tag0 {
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC5B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC5B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC5B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC5B5_LAUNCH_DMA (0x00000300)
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
@@ -199,8 +131,6 @@ typedef volatile struct _clc5b5_tag0 {
#define NVC5B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc6b5_h_
@@ -34,64 +32,6 @@ extern "C" {
#define AMPERE_DMA_COPY_A (0x0000C6B5)
typedef volatile struct _clc6b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x3F];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 Reserved03[0x2];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved04[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved05[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved08[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved09[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved10[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved11[0x3BA];
} ampere_dma_copy_aControlPio;
#define NVC6B5_NOP (0x00000100)
#define NVC6B5_NOP_PARAMETER 31:0
#define NVC6B5_PM_TRIGGER (0x00000140)
@@ -131,14 +71,6 @@ typedef volatile struct _clc6b5_tag0 {
#define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC6B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC6B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC6B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC6B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC6B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC6B5_LAUNCH_DMA (0x00000300)
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
@@ -199,8 +131,6 @@ typedef volatile struct _clc6b5_tag0 {
#define NVC6B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)

View File

@@ -1,19 +1,19 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
@@ -21,8 +21,6 @@
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc7b5_h_
@@ -34,69 +32,6 @@ extern "C" {
#define AMPERE_DMA_COPY_B (0x0000C7B5)
typedef volatile struct _clc7b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x36];
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
NvV32 Reserved03[0x6];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
NvV32 Reserved04[0x1];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved05[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved06[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved07[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved08[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved09[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved10[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved11[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved12[0x3BA];
} ampere_dma_copy_bControlPio;
#define NVC7B5_NOP (0x00000100)
#define NVC7B5_NOP_PARAMETER 31:0
#define NVC7B5_PM_TRIGGER (0x00000140)
@@ -146,14 +81,6 @@ typedef volatile struct _clc7b5_tag0 {
#define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC7B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC7B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC7B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC7B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC7B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC7B5_LAUNCH_DMA (0x00000300)
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
@@ -223,8 +150,6 @@ typedef volatile struct _clc7b5_tag0 {
#define NVC7B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)

View File

@@ -0,0 +1,74 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gb202_clca6f_h__
#define __gb202_clca6f_h__
typedef volatile struct Nvca6fControl_struct {
NvU32 Ignored00[0x23]; /* 0000-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored01[0x5c];
} Nvca6fControl, BlackwellBControlGPFifo;
#define BLACKWELL_CHANNEL_GPFIFO_B (0x0000CA6F)
#define NVCA6F_SET_OBJECT (0x00000000)
#define NVCA6F_SEM_ADDR_LO (0x0000005c)
#define NVCA6F_SEM_ADDR_LO_OFFSET 31:2
#define NVCA6F_SEM_ADDR_HI (0x00000060)
#define NVCA6F_SEM_ADDR_HI_OFFSET 24:0
#define NVCA6F_SEM_PAYLOAD_LO (0x00000064)
#define NVCA6F_SEM_PAYLOAD_HI (0x00000068)
#define NVCA6F_SEM_EXECUTE (0x0000006c)
#define NVCA6F_SEM_EXECUTE_OPERATION 2:0
#define NVCA6F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVCA6F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
/* GPFIFO entry format */
#define NVCA6F_GP_ENTRY__SIZE 8
#define NVCA6F_GP_ENTRY0_FETCH 0:0
#define NVCA6F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVCA6F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVCA6F_GP_ENTRY0_GET 31:2
#define NVCA6F_GP_ENTRY0_OPERAND 31:0
#define NVCA6F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
#define NVCA6F_GP_ENTRY1_GET_HI 7:0
#define NVCA6F_GP_ENTRY1_LEVEL 9:9
#define NVCA6F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVCA6F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVCA6F_GP_ENTRY1_LENGTH 30:10
#define NVCA6F_GP_ENTRY1_SYNC 31:31
#define NVCA6F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVCA6F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVCA6F_GP_ENTRY1_OPCODE 7:0
#define NVCA6F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVCA6F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVCA6F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVCA6F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
#define NVCA6F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
#endif // __gb202_clca6f_h__

View File

@@ -0,0 +1,44 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clcab5_h_
#define _clcab5_h_
#define BLACKWELL_DMA_COPY_B (0x0000CAB5)
#define NVCAB5_LAUNCH_DMA (0x00000300)
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PREFETCH (0x00000003)
#define NVCAB5_REQ_ATTR (0x00000754)
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS 1:0
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_FIRST (0x00000000)
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_NORMAL (0x00000001)
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_LAST (0x00000002)
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_DEMOTE (0x00000003)
#endif /* _clcab5_h_ */

View File

@@ -1,25 +1,25 @@
/*******************************************************************************
Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvtypes.h"
@@ -32,6 +32,28 @@ extern "C" {
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
typedef volatile struct _clcba2_tag0 {
NvV32 Reserved00[0x100];
NvV32 DecryptCopySrcAddrHi; // 0x00000400 - 0x00000403
NvV32 DecryptCopySrcAddrLo; // 0x00000404 - 0x00000407
NvV32 DecryptCopyDstAddrHi; // 0x00000408 - 0x0000040B
NvV32 DecryptCopyDstAddrLo; // 0x0000040c - 0x0000040F
NvU32 DecryptCopySize; // 0x00000410 - 0x00000413
NvU32 DecryptCopyAuthTagAddrHi; // 0x00000414 - 0x00000417
NvU32 DecryptCopyAuthTagAddrLo; // 0x00000418 - 0x0000041B
NvV32 DigestAddrHi; // 0x0000041C - 0x0000041F
NvV32 DigestAddrLo; // 0x00000420 - 0x00000423
NvV32 Reserved01[0x7];
NvV32 SemaphoreA; // 0x00000440 - 0x00000443
NvV32 SemaphoreB; // 0x00000444 - 0x00000447
NvV32 SemaphoreSetPayloadLower; // 0x00000448 - 0x0000044B
NvV32 SemaphoreSetPayloadUppper; // 0x0000044C - 0x0000044F
NvV32 SemaphoreD; // 0x00000450 - 0x00000453
NvU32 Reserved02[0x7];
NvV32 Execute; // 0x00000470 - 0x00000473
NvV32 Reserved03[0x23];
} NVCBA2_HOPPER_SEC2_WORK_LAUNCH_AControlPio;
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
@@ -90,6 +112,45 @@ extern "C" {
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER 6:6
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_DISABLE (0x00000000)
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_ENABLE (0x00000001)
// Class definitions
#define NVCBA2_DECRYPT_COPY_SIZE_MAX_BYTES (2*1024*1024)
#define NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES (1024*1024*1024)
// Errors
#define NVCBA2_ERROR_NONE (0x00000000)
#define NVCBA2_ERROR_DECRYPT_COPY_SRC_ADDR_MISALIGNED_POINTER (0x00000001)
#define NVCBA2_ERROR_DECRYPT_COPY_DEST_ADDR_MISALIGNED_POINTER (0x00000002)
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000003)
#define NVCBA2_ERROR_DECRYPT_COPY_DMA_NACK (0x00000004)
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_MISMATCH (0x00000005)
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000006)
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_DMA_NACK (0x00000007)
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_CHECK_FAILURE (0x00000008)
#define NVCBA2_ERROR_MISALIGNED_SIZE (0x00000009)
#define NVCBA2_ERROR_MISSING_METHODS (0x0000000A)
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_DMA_NACK (0x0000000B)
#define NVCBA2_ERROR_DECRYPT_SIZE_MAX_EXCEEDED (0x0000000C)
#define NVCBA2_ERROR_OS_APPLICATION (0x0000000D)
#define NVCBA2_ERROR_INVALID_CTXSW_REQUEST (0x0000000E)
#define NVCBA2_ERROR_BUFFER_OVERFLOW (0x0000000F)
#define NVCBA2_ERROR_IV_OVERFLOW (0x00000010)
#define NVCBA2_ERROR_INTERNAL_SETUP_FAILURE (0x00000011)
#define NVCBA2_ERROR_DECRYPT_COPY_INTERNAL_DMA_FAILURE (0x00000012)
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_INTERNAL_DMA_FAILURE (0x00000013)
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_HMAC_CALC_FAILURE (0x00000014)
#define NVCBA2_ERROR_NONCE_OVERFLOW (0x00000015)
#define NVCBA2_ERROR_AES_GCM_DECRYPTION_FAILURE (0x00000016)
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_INTERNAL_DMA_FAILURE (0x00000017)
#define NVCBA2_ERROR_KEY_DERIVATION_FAILURE (0x00000018)
#define NVCBA2_ERROR_SCRUBBER_FAILURE (0x00000019)
#define NVCBA2_ERROR_SCRUBBER_INVALD_ADDRESS (0x0000001a)
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
#ifdef __cplusplus
}; /* extern "C" */

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2023 NVIDIA Corporation
Copyright (c) 2013-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -35,6 +35,7 @@
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 (0x000001A0)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 (0x000001B0)
/* valid ARCHITECTURE_GP10x implementation values */
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)

View File

@@ -462,6 +462,7 @@
#define NV_PFAULT_CLIENT_HUB_SCC3 0x00000044 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB3 0x00000045 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD1 0x00000046 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD2 0x00000047 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD3 0x00000048 /* */
#define NV_PFAULT_CLIENT_HUB_GSPLITE1 0x00000049 /* */

View File

@@ -1,560 +0,0 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gb100_dev_mmu_h__
#define __gb100_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
#define NV_MMU_VER1_PDE /* ----G */
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PDE__SIZE 8
#define NV_MMU_VER1_PTE /* ----G */
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PTE__SIZE 8
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_NEW_PDE /* ----G */
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PDE__SIZE 8
#define NV_MMU_NEW_DUAL_PDE /* ----G */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
#define NV_MMU_NEW_PTE /* ----G */
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PTE__SIZE 8
#define NV_MMU_VER2_PDE /* ----G */
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PDE__SIZE 8
#define NV_MMU_VER2_DUAL_PDE /* ----G */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
#define NV_MMU_VER2_PTE /* ----G */
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PTE__SIZE 8
#define NV_MMU_VER3_PDE /* ----G */
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_PDE__SIZE 8
#define NV_MMU_VER3_DUAL_PDE /* ----G */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
#define NV_MMU_VER3_PTE /* ----G */
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_PTE__SIZE 8
#define NV_MMU_CLIENT /* ----G */
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
#endif // __gb100_dev_mmu_h__

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -81,7 +81,7 @@
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
// This exists in order to have a function to place a breakpoint on:
void on_nvq_assert(void)
static void on_nvq_assert(void)
{
(void)NULL;
}

View File

@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
if (i == (attempts - 1))
break;
// Get the NUMA node where the first page of the stack is resident. If

View File

@@ -1,18 +1,8 @@
NVIDIA_UVM_SOURCES ?=
NVIDIA_UVM_SOURCES_CXX ?=
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
@@ -36,6 +26,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range_device_p2p.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_policy.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group.c
@@ -54,6 +45,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -82,8 +74,13 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
@@ -101,6 +98,8 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
@@ -128,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c

View File

@@ -13,19 +13,6 @@ NVIDIA_UVM_OBJECTS =
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
# Some linux kernel functions rely on being built with optimizations on and
# to work around this we put wrappers for them in a separate file that's built
# with optimizations on in debug builds and skipped in other builds.
# Notably gcc 4.4 supports per function optimization attributes that would be
# easier to use, but is too recent to rely on for now.
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
ifneq ($(UVM_BUILD_TYPE),debug)
# Only build the wrappers on debug builds
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
endif
obj-m += nvidia-uvm.o
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
@@ -36,15 +23,14 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
#
ifeq ($(UVM_BUILD_TYPE),debug)
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
else
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
# work:
NVIDIA_UVM_CFLAGS += -DDEBUG
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
endif
NVIDIA_UVM_CFLAGS += -O2
NVIDIA_UVM_CFLAGS += -DDEBUG -g
endif
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
# work:
NVIDIA_UVM_CFLAGS += -DDEBUG
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
endif
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
@@ -56,30 +42,17 @@ NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
ifeq ($(UVM_BUILD_TYPE),debug)
# Force optimizations on for the wrappers
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
endif
#
# Register the conftests needed by nvidia-uvm.ko
#
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_bus_address
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
@@ -88,26 +61,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
@@ -115,6 +76,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -127,9 +127,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
goto err;
}
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type != UVM_FD_UNINITIALIZED) {
status = NV_ERR_IN_USE;
@@ -222,10 +222,6 @@ static int uvm_open(struct inode *inode, struct file *filp)
// assigning f_mapping.
mapping->a_ops = inode->i_mapping->a_ops;
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
#endif
filp->private_data = NULL;
filp->f_mapping = mapping;
@@ -325,21 +321,21 @@ static int uvm_release_entry(struct inode *inode, struct file *filp)
static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie)
{
uvm_va_range_t *va_range, *va_range_next;
uvm_va_range_managed_t *managed_range, *managed_range_next;
NvU64 size = 0;
uvm_assert_rwsem_locked_write(&uvm_va_space_get(vma->vm_file)->lock);
uvm_for_each_va_range_in_vma_safe(va_range, va_range_next, vma) {
uvm_for_each_va_range_managed_in_vma_safe(managed_range, managed_range_next, vma) {
// On exit_mmap (process teardown), current->mm is cleared so
// uvm_va_range_vma_current would return NULL.
UVM_ASSERT(uvm_va_range_vma(va_range) == vma);
UVM_ASSERT(va_range->node.start >= vma->vm_start);
UVM_ASSERT(va_range->node.end < vma->vm_end);
size += uvm_va_range_size(va_range);
UVM_ASSERT(uvm_va_range_vma(managed_range) == vma);
UVM_ASSERT(managed_range->va_range.node.start >= vma->vm_start);
UVM_ASSERT(managed_range->va_range.node.end < vma->vm_end);
size += uvm_va_range_size(&managed_range->va_range);
if (make_zombie)
uvm_va_range_zombify(va_range);
uvm_va_range_zombify(managed_range);
else
uvm_va_range_destroy(va_range, NULL);
uvm_va_range_destroy(&managed_range->va_range, NULL);
}
if (vma->vm_private_data) {
@@ -351,18 +347,17 @@ static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie
static void uvm_destroy_vma_semaphore_pool(struct vm_area_struct *vma)
{
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
uvm_va_space_t *va_space;
uvm_va_range_t *va_range;
va_space = uvm_va_space_get(vma->vm_file);
uvm_assert_rwsem_locked(&va_space->lock);
va_range = uvm_va_range_find(va_space, vma->vm_start);
UVM_ASSERT(va_range &&
va_range->node.start == vma->vm_start &&
va_range->node.end + 1 == vma->vm_end &&
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
UVM_ASSERT(semaphore_pool_range &&
semaphore_pool_range->va_range.node.start == vma->vm_start &&
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end);
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
}
// If a fault handler is not set, paths like handle_pte_fault in older kernels
@@ -478,7 +473,7 @@ static void uvm_vm_open_failure(struct vm_area_struct *original,
static void uvm_vm_open_managed(struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_va_range_t *va_range;
uvm_va_range_managed_t *managed_range;
struct vm_area_struct *original;
NV_STATUS status;
NvU64 new_end;
@@ -534,13 +529,13 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
goto out;
}
// There can be multiple va_ranges under the vma already. Check if one spans
// There can be multiple ranges under the vma already. Check if one spans
// the new split boundary. If so, split it.
va_range = uvm_va_range_find(va_space, new_end);
UVM_ASSERT(va_range);
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
if (va_range->node.end != new_end) {
status = uvm_va_range_split(va_range, new_end, NULL);
managed_range = uvm_va_range_managed_find(va_space, new_end);
UVM_ASSERT(managed_range);
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
if (managed_range->va_range.node.end != new_end) {
status = uvm_va_range_split(managed_range, new_end, NULL);
if (status != NV_OK) {
UVM_DBG_PRINT("Failed to split VA range, destroying both: %s. "
"original vma [0x%lx, 0x%lx) new vma [0x%lx, 0x%lx)\n",
@@ -552,10 +547,10 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
}
}
// Point va_ranges to the new vma
uvm_for_each_va_range_in_vma(va_range, vma) {
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
va_range->managed.vma_wrapper = vma->vm_private_data;
// Point managed_ranges to the new vma
uvm_for_each_va_range_managed_in_vma(managed_range, vma) {
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
managed_range->vma_wrapper = vma->vm_private_data;
}
out:
@@ -657,12 +652,12 @@ static struct vm_operations_struct uvm_vm_ops_managed =
};
// vm operations on semaphore pool allocations only control CPU mappings. Unmapping GPUs,
// freeing the allocation, and destroying the va_range are handled by UVM_FREE.
// freeing the allocation, and destroying the range are handled by UVM_FREE.
static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
{
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
uvm_va_range_t *va_range;
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
NV_STATUS status;
@@ -670,18 +665,24 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
uvm_va_space_down_write(va_space);
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
UVM_ASSERT(va_range);
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL &&
va_range->node.start == origin_vma->vm_start &&
va_range->node.end + 1 == origin_vma->vm_end,
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, origin_vma->vm_start);
UVM_ASSERT(semaphore_pool_range);
UVM_ASSERT_MSG(semaphore_pool_range &&
semaphore_pool_range->va_range.node.start == origin_vma->vm_start &&
semaphore_pool_range->va_range.node.end + 1 == origin_vma->vm_end,
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
va_range->node.end + 1, va_range->type);
(NvU64)origin_vma->vm_start,
(NvU64)origin_vma->vm_end,
semaphore_pool_range->va_range.node.start,
semaphore_pool_range->va_range.node.end + 1,
semaphore_pool_range->va_range.type);
// Semaphore pool vmas do not have vma wrappers, but some functions will
// assume vm_private_data is a wrapper.
vma->vm_private_data = NULL;
#if defined(VM_WIPEONFORK)
nv_vm_flags_set(vma, VM_WIPEONFORK);
#endif
if (is_fork) {
// If we forked, leave the parent vma alone.
@@ -689,9 +690,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
// uvm_disable_vma unmaps in the parent as well; clear the uvm_mem CPU
// user mapping metadata and then remap.
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, origin_vma);
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, origin_vma);
if (status != NV_OK) {
UVM_DBG_PRINT("Failed to remap semaphore pool to CPU for parent after fork; status = %d (%s)",
status, nvstatusToString(status));
@@ -702,7 +703,7 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
origin_vma->vm_private_data = NULL;
origin_vma->vm_ops = &uvm_vm_ops_disabled;
vma->vm_ops = &uvm_vm_ops_disabled;
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
}
uvm_va_space_up_write(va_space);
@@ -751,10 +752,84 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
#endif
};
static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
{
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
uvm_va_range_t *va_range;
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
uvm_record_lock_mmap_lock_write(current->mm);
uvm_va_space_down_write(va_space);
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
UVM_ASSERT(va_range);
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_DEVICE_P2P &&
va_range->node.start == origin_vma->vm_start &&
va_range->node.end + 1 == origin_vma->vm_end,
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
va_range->node.end + 1, va_range->type);
// Device P2P vmas do not have vma wrappers, but some functions will
// assume vm_private_data is a wrapper.
vma->vm_private_data = NULL;
#if defined(VM_WIPEONFORK)
nv_vm_flags_set(vma, VM_WIPEONFORK);
#endif
if (is_fork) {
// If we forked, leave the parent vma alone.
uvm_disable_vma(vma);
// uvm_disable_vma unmaps in the parent as well so remap the parent
uvm_va_range_device_p2p_map_cpu(va_range->va_space, origin_vma, uvm_va_range_to_device_p2p(va_range));
}
else {
// mremap will free the backing pages via unmap so we can't support it.
origin_vma->vm_private_data = NULL;
origin_vma->vm_ops = &uvm_vm_ops_disabled;
vma->vm_ops = &uvm_vm_ops_disabled;
unmap_mapping_range(va_space->mapping, va_range->node.start, va_range->node.end - va_range->node.start + 1, 1);
}
uvm_va_space_up_write(va_space);
uvm_record_unlock_mmap_lock_write(current->mm);
}
static void uvm_vm_open_device_p2p_entry(struct vm_area_struct *vma)
{
UVM_ENTRY_VOID(uvm_vm_open_device_p2p(vma));
}
// Device P2P pages are only mapped on the CPU. Pages are allocated externally
// to UVM but destroying the range must unpin the RM object.
static void uvm_vm_close_device_p2p(struct vm_area_struct *vma)
{
}
static void uvm_vm_close_device_p2p_entry(struct vm_area_struct *vma)
{
UVM_ENTRY_VOID(uvm_vm_close_device_p2p(vma));
}
static struct vm_operations_struct uvm_vm_ops_device_p2p =
{
.open = uvm_vm_open_device_p2p_entry,
.close = uvm_vm_close_device_p2p_entry,
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
.fault = uvm_vm_fault_sigbus_wrapper_entry,
#else
.fault = uvm_vm_fault_sigbus_entry,
#endif
};
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
{
uvm_va_space_t *va_space;
uvm_va_range_t *va_range;
NV_STATUS status = uvm_global_get_status();
int ret = 0;
bool vma_wrapper_allocated = false;
@@ -845,18 +920,28 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
status = uvm_va_range_create_mmap(va_space, current->mm, vma->vm_private_data, NULL);
if (status == NV_ERR_UVM_ADDRESS_IN_USE) {
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
uvm_va_range_device_p2p_t *device_p2p_range;
// If the mmap is for a semaphore pool, the VA range will have been
// allocated by a previous ioctl, and the mmap just creates the CPU
// mapping.
va_range = uvm_va_range_find(va_space, vma->vm_start);
if (va_range && va_range->node.start == vma->vm_start &&
va_range->node.end + 1 == vma->vm_end &&
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) {
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
device_p2p_range = uvm_va_range_device_p2p_find(va_space, vma->vm_start);
if (semaphore_pool_range && semaphore_pool_range->va_range.node.start == vma->vm_start &&
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end) {
uvm_vma_wrapper_destroy(vma->vm_private_data);
vma_wrapper_allocated = false;
vma->vm_private_data = vma;
vma->vm_ops = &uvm_vm_ops_semaphore_pool;
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, vma);
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, vma);
}
else if (device_p2p_range && device_p2p_range->va_range.node.start == vma->vm_start &&
device_p2p_range->va_range.node.end + 1 == vma->vm_end) {
uvm_vma_wrapper_destroy(vma->vm_private_data);
vma_wrapper_allocated = false;
vma->vm_private_data = vma;
vma->vm_ops = &uvm_vm_ops_device_p2p;
status = uvm_va_range_device_p2p_map_cpu(va_space, vma, device_p2p_range);
}
}
@@ -914,8 +999,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
// attempt to be made. This is safe because other threads will have only had
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
// case.
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type == UVM_FD_UNINITIALIZED) {
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
@@ -1001,6 +1087,9 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAN_UP_ZOMBIE_RESOURCES, uvm_api_clean_up_zombie_resources);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_POPULATE_PAGEABLE, uvm_api_populate_pageable);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_VALIDATE_VA_RANGE, uvm_api_validate_va_range);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_V2,uvm_api_tools_get_processor_uuid_table_v2);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_ALLOC_DEVICE_P2P, uvm_api_alloc_device_p2p);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAR_ALL_ACCESS_COUNTERS, uvm_api_clear_all_access_counters);
}
// Try the test ioctls if none of the above matched

View File

@@ -45,20 +45,20 @@
// #endif
// 3) Do the same thing for the function definition, and for any structs that
// are taken as arguments to these functions.
// 4) Let this change propagate over to cuda_a and dev_a, so that the CUDA and
// nvidia-cfg libraries can start using the new API by bumping up the API
// 4) Let this change propagate over to cuda_a and bugfix_main, so that the CUDA
// and nvidia-cfg libraries can start using the new API by bumping up the API
// version number it's using.
// Places where UVM_API_REVISION is defined are:
// drivers/gpgpu/cuda/cuda.nvmk (cuda_a)
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (dev_a)
// 5) Once the dev_a and cuda_a changes have made it back into chips_a,
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (bugfix_main)
// 5) Once the bugfix_main and cuda_a changes have made it back into chips_a,
// remove the old API declaration, definition, and any old structs that were
// in use.
#ifndef _UVM_H_
#define _UVM_H_
#define UVM_API_LATEST_REVISION 12
#define UVM_API_LATEST_REVISION 13
#if !defined(UVM_API_REVISION)
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@@ -379,41 +379,24 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
// OS state required to register the GPU is malformed, or the partition
// identified by the user handles or its configuration changed.
//
// NV_ERR_NVLINK_FABRIC_NOT_READY:
// (On NvSwitch-connected system) Indicates that the fabric has not been
// configured yet. Caller must retry GPU registration.
//
// NV_ERR_NVLINK_FABRIC_FAILURE:
// (On NvSwitch-connected systems) Indicates that the NvLink fabric
// failed to be configured.
//
// NV_ERR_GPU_MEMORY_ONLINING_FAULURE:
// (On coherent systems) The GPU's memory onlining failed.
//
// NV_ERR_GENERIC:
// Unexpected error. We try hard to avoid returning this error code,
// because it is not very informative.
//
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(8)
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
#else
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid,
const UvmGpuPlatformParams *platformParams);
#endif
#if UVM_API_REV_IS_AT_MOST(8)
//------------------------------------------------------------------------------
// UvmRegisterGpuSmc
//
// The same as UvmRegisterGpu, but takes additional parameters to specify the
// GPU partition being registered if SMC is enabled.
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the physical GPU of the SMC partition to register.
//
// platformParams: (INPUT)
// User handles identifying the partition to register.
//
// Error codes (see UvmRegisterGpu also):
//
// NV_ERR_INVALID_STATE:
// SMC was not enabled, or the partition identified by the user
// handles or its configuration changed.
//
NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
const UvmGpuPlatformParams *platformParams);
#endif
//------------------------------------------------------------------------------
// UvmUnregisterGpu
@@ -1345,9 +1328,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
//
// NV_ERR_INVALID_ARGUMENT:
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
// or caching is requested on more than one GPU.
// The Confidential Computing feature is enabled and the perGpuAttribs
// list is empty.
// or caching is requested on more than one GPU, or (in Confidential
// Computing only) the perGpuAttribs list is empty.
//
// NV_ERR_NOT_SUPPORTED:
// The current process is not the one which called UvmInitialize, and
@@ -1364,6 +1346,86 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
const UvmGpuMappingAttributes *perGpuAttribs,
NvLength gpuAttribsCount);
//------------------------------------------------------------------------------
// UvmAllocDeviceP2P
//
// Create a VA range within the process's address space reserved for use by
// other devices to directly access GPU memory. The memory associated with the
// RM handle is mapped into the user address space associated with the range for
// direct access from the CPU.
//
// The VA range must not overlap with an existing VA range, irrespective of
// whether the existing range corresponds to a UVM allocation or an external
// allocation.
//
// Multiple VA ranges may be created mapping the same physical memory associated
// with the RM handle. The associated GPU memory will not be freed until all VA
// ranges have been destroyed either explicitly or implicitly and all non-UVM
// users (eg. third party device drivers) have stopped using the associated
// GPU memory.
//
// The VA range can be unmapped and freed by calling UvmFree.
//
// Destroying the final range mapping the RM handle may block until all third
// party device drivers and other kernel users have stopped using the memory.
//
// These VA ranges are only associated with a single GPU.
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition containing the
// memory to be mapped on the CPU.
//
// base: (INPUT)
// Base address of the virtual address range.
//
// length: (INPUT)
// Length, in bytes, of the range.
//
// offset: (INPUT)
// Offset, in bytes, from the start of the externally allocated memory
// to map from.
//
// platformParams: (INPUT)
// Platform specific parameters that identify the allocation.
// On Linux: RM ctrl fd, hClient and the handle (hMemory) of the
// externally allocated memory to map.
//
// Errors:
//
// NV_ERR_INVALID_ADDRESS:
// base is NULL or length is zero or at least one of base and length is
// not aligned to 4K.
//
// NV_ERR_INVALID_DEVICE:
// The gpuUuid was either not registered or has no GPU VA space
// registered for it.
//
// NV_ERR_INVALID_ARGUMENT:
// base + offset + length exceeeds the end of the externally allocated
// memory handle or the externally allocated handle is not valid.
//
// NV_ERR_UVM_ADDRESS_IN_USE:
// The requested virtual address range overlaps with an existing
// allocation.
//
// NV_ERR_NO_MEMORY:
// Internal memory allocation failed.
//
// NV_ERR_NOT_SUPPORTED:
// The device peer-to-peer feature is not supported by the current
// system configuration. This may be because the GPU doesn't support
// the peer-to-peer feature or the kernel was not built with the correct
// configuration options.
//
//------------------------------------------------------------------------------
NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
void *base,
NvLength length,
NvLength offset,
const UvmDeviceP2PPlatformParams *platformParams);
//------------------------------------------------------------------------------
// UvmMigrate
//
@@ -1417,7 +1479,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
// If read duplication is enabled on any pages in the VA range, then those pages
// are read duplicated at the destination processor, leaving the source copy, if
// present, intact with only its mapping changed to read-only if it wasn't
// already mapped that way.
// already mapped that way. The exception to this behavior is migrating pages
// between different NUMA nodes, in which case the pages are migrated to the
// destination node and a read-only mapping is created to the migrated pages.
//
// Pages in the VA range are migrated even if their preferred location is set to
// a processor other than the destination processor.
@@ -2160,7 +2224,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
//
// If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages
// in this VA range, then those pages will also be read duplicated on the
// destination processor for the migration.
// destination processor for the migration unless the migration is between CPU
// NUMA nodes, in which case the pages are migrated to the destination NUMA
// node and a read-only mapping to the migrated pages is created.
//
// Enabling read duplication on a VA range requires the CPU and all GPUs with
// registered VA spaces to be fault-capable. Otherwise, the migration and
@@ -3276,7 +3342,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
//------------------------------------------------------------------------------
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
UvmEventEntry_V1 *pBuffer,
UvmEventEntry *pBuffer,
NvU64 *nEntries);
//------------------------------------------------------------------------------
@@ -3472,32 +3538,21 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
//
#if UVM_API_REV_IS_AT_MOST(10)
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
NvLength UvmToolsGetEventControlSize(void);
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
// sizeof(UvmEventEntry_V2).
NvLength UvmToolsGetEventEntrySize(void);
#endif
NvLength UvmToolsGetNumberOfCounters(void);
//------------------------------------------------------------------------------
// UvmToolsCreateEventQueue
//
// This call creates an event queue that can hold the given number of events.
// All events are disabled by default. Event queue data persists lifetime of the
// target process.
// This function is deprecated. See UvmToolsCreateEventQueue_V2.
//
// This call creates an event queue that can hold the given number of
// UvmEventEntry events. All events are disabled by default. Event queue data
// persists lifetime of the target process.
//
// Arguments:
// session: (INPUT)
// Handle to the tools session.
//
// version: (INPUT)
// Requested version for events or counters.
// See UvmToolsEventQueueVersion.
//
// event_buffer: (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold at least event_buffer_size events. Gets pinned until queue is
@@ -3520,8 +3575,55 @@ NvLength UvmToolsGetNumberOfCounters(void);
// Session handle does not refer to a valid session
//
// NV_ERR_INVALID_ARGUMENT:
// The version is not UvmToolsEventQueueVersion_V1 or
// UvmToolsEventQueueVersion_V2.
// One of the parameters: event_buffer, event_buffer_size, event_control
// is not valid
//
// NV_ERR_INSUFFICIENT_RESOURCES:
// There could be multiple reasons for this error. One would be that
// it's not possible to allocate a queue of requested size. Another
// would be either event_buffer or event_control memory couldn't be
// pinned (e.g. because of OS limitation of pinnable memory). Also it
// could not have been possible to create UvmToolsEventQueueDescriptor.
//
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
//------------------------------------------------------------------------------
// UvmToolsCreateEventQueue_V2
//
// This call creates an event queue that can hold the given number of
// UvmEventEntry_V2 events. All events are disabled by default. Event queue data
// persists beyond the lifetime of the target process.
//
// Arguments:
// session: (INPUT)
// Handle to the tools session.
//
// event_buffer: (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold at least event_buffer_size events. Gets pinned until queue is
// destroyed.
//
// event_buffer_size: (INPUT)
// Size of the event queue buffer in units of UvmEventEntry_V2's. Must
// be a power of two, and greater than 1.
//
// event_control (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold UvmToolsEventControlData (although single page-size allocation
// should be more than enough). Gets pinned until queue is destroyed.
//
// queue: (OUTPUT)
// Handle to the created queue.
//
// Error codes:
// NV_ERR_INSUFFICIENT_PERMISSIONS:
// Session handle does not refer to a valid session
//
// NV_ERR_INVALID_ARGUMENT:
// One of the parameters: event_buffer, event_buffer_size, event_control
// is not valid
//
@@ -3538,20 +3640,11 @@ NvLength UvmToolsGetNumberOfCounters(void);
// could not have been possible to create UvmToolsEventQueueDescriptor.
//
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(10)
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
#else
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
UvmToolsEventQueueVersion version,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
#endif
NV_STATUS UvmToolsCreateEventQueue_V2(UvmToolsSessionHandle session,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
@@ -3866,9 +3959,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
// In-process scenario when targetVa address + size overlaps with buffer + size.
//
// This is essentially a UVM version of RM ctrl call
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
// information), please refer to the documentation:
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
//
// Arguments:
// session: (INPUT)
@@ -3922,9 +4013,7 @@ NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
// buffer + size.
//
// This is essentially a UVM version of RM ctrl call
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
// information), please refer to the documentation:
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
//
// Arguments:
// session: (INPUT)
@@ -3967,6 +4056,8 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
//------------------------------------------------------------------------------
// UvmToolsGetProcessorUuidTable
//
// This function is deprecated. See UvmToolsGetProcessorUuidTable_V2.
//
// Populate a table with the UUIDs of all the currently registered processors
// in the target process. When a GPU is registered, it is added to the table.
// When a GPU is unregistered, it is removed. As long as a GPU remains
@@ -3979,55 +4070,63 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
// session: (INPUT)
// Handle to the tools session.
//
// version: (INPUT)
// Requested version for the UUID table returned. The version must
// match the requested version of the event queue created with
// UvmToolsCreateEventQueue(). See UvmToolsEventQueueVersion.
// If the version of the event queue does not match the version of the
// UUID table, the behavior is undefined.
//
// table: (OUTPUT)
// Array of processor UUIDs, including the CPU's UUID which is always
// at index zero. The number of elements in the array must be greater
// or equal to UVM_MAX_PROCESSORS_V1 if the version is
// UvmToolsEventQueueVersion_V1 and UVM_MAX_PROCESSORS if the version is
// UvmToolsEventQueueVersion_V2.
// or equal to UVM_MAX_PROCESSORS_V1.
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
// index this array. Unused indices will have a UUID of zero.
// If version is UvmToolsEventQueueVersion_V1 then the reported UUID
// will be that of the corresponding physical GPU, even if multiple SMC
// partitions are registered under that physical GPU. If version is
// UvmToolsEventQueueVersion_V2 then the reported UUID will be the GPU
// instance UUID if SMC is enabled, otherwise it will be the UUID of
// the physical GPU.
// The reported UUID will be that of the corresponding physical GPU,
// even if multiple SMC partitions are registered under that physical
// GPU.
//
// Error codes:
// NV_ERR_INVALID_ADDRESS:
// writing to table failed.
//
// NV_ERR_INVALID_ARGUMENT:
// The version is not UvmToolsEventQueueVersion_V1 or
// UvmToolsEventQueueVersion_V2.
// NV_ERR_NO_MEMORY:
// Internal memory allocation failed.
//------------------------------------------------------------------------------
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
NvProcessorUuid *table);
//------------------------------------------------------------------------------
// UvmToolsGetProcessorUuidTable_V2
//
// Populate a table with the UUIDs of all the currently registered processors
// in the target process. When a GPU is registered, it is added to the table.
// When a GPU is unregistered, it is removed. As long as a GPU remains
// registered, its index in the table does not change.
// Note that the index in the table corresponds to the processor ID reported
// in UvmEventEntry event records and that the table is not contiguously packed
// with non-zero UUIDs even with no GPU unregistrations.
//
// Arguments:
// session: (INPUT)
// Handle to the tools session.
//
// table: (OUTPUT)
// Array of processor UUIDs, including the CPU's UUID which is always
// at index zero. The number of elements in the array must be greater
// or equal to UVM_MAX_PROCESSORS.
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
// index this array. Unused indices will have a UUID of zero.
// The reported UUID will be the GPU instance UUID if SMC is enabled,
// otherwise it will be the UUID of the physical GPU.
//
// Error codes:
// NV_ERR_INVALID_ADDRESS:
// writing to table failed.
//
// NV_ERR_NOT_SUPPORTED:
// The kernel is not able to support the requested version
// (i.e., the UVM kernel driver is older and doesn't support
// UvmToolsEventQueueVersion_V2).
// The UVM kernel driver is older and doesn't support
// UvmToolsGetProcessorUuidTable_V2.
//
// NV_ERR_NO_MEMORY:
// Internal memory allocation failed.
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(11)
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
UvmToolsEventQueueVersion version,
NvProcessorUuid *table,
NvLength table_size,
NvLength *count);
#else
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
UvmToolsEventQueueVersion version,
NvProcessorUuid *table);
#endif
NV_STATUS UvmToolsGetProcessorUuidTable_V2(UvmToolsSessionHandle session,
NvProcessorUuid *table);
//------------------------------------------------------------------------------
// UvmToolsFlushEvents

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2023 NVIDIA Corporation
Copyright (c) 2021-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -51,6 +51,9 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
@@ -98,4 +101,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2023 NVIDIA Corporation
Copyright (c) 2018-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -51,6 +51,9 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
@@ -107,4 +110,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
parent_gpu->conf_computing.per_channel_key_rotation = false;
}

View File

@@ -29,6 +29,8 @@
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
UVM_ASSERT(push->channel);
if (!uvm_channel_is_proxy(push->channel))
return true;
@@ -116,6 +118,16 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
{
NvU64 push_begin_gpu_va;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
UVM_ASSERT(push->channel);
if (peer_copy && !uvm_channel_is_p2p(push->channel)) {
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
src.address,
dst.address);
return false;
}
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
return true;
@@ -182,6 +194,8 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
{
uvm_pushbuffer_t *pushbuffer;
UVM_ASSERT(push->channel);
if (!uvm_channel_is_proxy(push->channel))
return;

View File

@@ -36,6 +36,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
return true;
UVM_ASSERT(push->channel);
if (uvm_channel_is_privileged(push->channel)) {
switch (method_address) {
case NVC56F_SET_OBJECT:
@@ -84,6 +86,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
UVM_ASSERT(push->channel);
if (!uvm_channel_is_proxy(push->channel))
return true;

View File

@@ -47,7 +47,7 @@
{ \
params_type params; \
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(&params, (void __user*)arg, sizeof(params))) \
if (copy_from_user(&params, (void __user*)arg, sizeof(params))) \
return -EFAULT; \
\
params.rmStatus = uvm_global_get_status(); \
@@ -60,7 +60,7 @@
params.rmStatus = function_name(&params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, &params, sizeof(params))) \
if (copy_to_user((void __user*)arg, &params, sizeof(params))) \
return -EFAULT; \
\
return 0; \
@@ -84,7 +84,7 @@
if (!params) \
return -ENOMEM; \
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
if (copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
uvm_kvfree(params); \
return -EFAULT; \
} \
@@ -99,7 +99,7 @@
params->rmStatus = function_name(params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
if (copy_to_user((void __user*)arg, params, sizeof(*params))) \
ret = -EFAULT; \
\
uvm_kvfree(params); \
@@ -244,6 +244,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_enable_system_wide_atomics(UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_disable_system_wide_atomics(UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_init_event_tracker_v2(UVM_TOOLS_INIT_EVENT_TRACKER_V2_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp);
@@ -256,5 +257,7 @@ NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file
NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_alloc_device_p2p(UVM_ALLOC_DEVICE_P2P_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
#endif // __UVM_API_H__

View File

@@ -55,8 +55,9 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
NvU64 user_space_start;
NvU64 user_space_length;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
uvm_populate_permissions_t populate_permissions = fault_service_type ?
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
UVM_POPULATE_PERMISSIONS_INHERIT;
@@ -90,18 +91,27 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
uvm_migrate_args_t uvm_migrate_args =
{
.va_space = va_space,
.mm = mm,
.dst_id = ats_context->residency_id,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = populate_permissions,
.touch = fault_service_type,
.skip_mapped = fault_service_type,
.populate_on_cpu_alloc_failures = fault_service_type,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
.va_space = va_space,
.mm = mm,
.dst_id = ats_context->residency_id,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = populate_permissions,
.touch = is_fault_service_type,
.skip_mapped = is_fault_service_type,
.populate_on_cpu_alloc_failures = is_fault_service_type,
.populate_on_migrate_vma_failures = is_fault_service_type,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
// Potential STO NVLINK errors cannot be resolved in fault or access
// counter handlers. If there are GPUs to check for STO, it's either
// a) a false positive, and the migration went through ok, or
// b) a true positive, and the destination is all zeros, and the
// application will be terminated soon.
.gpus_to_check_for_nvlink_errors = NULL,
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
};
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
@@ -112,7 +122,7 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
// set skip_mapped to true. For pages already mapped, this will only handle
// PTE upgrades if needed.
status = uvm_migrate_pageable(&uvm_migrate_args);
if (status == NV_WARN_NOTHING_TO_DO)
if (is_fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
status = NV_OK;
UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
@@ -145,7 +155,10 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_context_t *ats_context)
{
uvm_gpu_t *gpu = gpu_va_space->gpu;
int residency = uvm_gpu_numa_node(gpu);
int residency;
UVM_ASSERT(gpu->mem_info.numa.enabled);
residency = uvm_gpu_numa_node(gpu);
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
struct mempolicy *vma_policy = vma_policy(vma);
@@ -379,14 +392,20 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region)
{
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *accessed_mask;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
accessed_mask = &ats_context->faults.accessed_mask;
else
accessed_mask = &ats_context->access_counters.accessed_mask;
if (uvm_page_mask_empty(accessed_mask))
return;
@@ -406,7 +425,7 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *accessed_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
@@ -420,6 +439,11 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status;
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
accessed_mask = &ats_context->faults.accessed_mask;
else
accessed_mask = &ats_context->access_counters.accessed_mask;
if (uvm_page_mask_empty(accessed_mask))
return status;
@@ -432,12 +456,12 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
}
else {
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
ats_compute_prefetch_mask(gpu_va_space, vma, service_type, ats_context, max_prefetch_region);
}
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
@@ -451,6 +475,65 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
return status;
}
static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_va_block_region_t region,
uvm_fault_access_type_t access_type,
uvm_ats_fault_context_t *ats_context,
uvm_page_mask_t *faults_serviced_mask)
{
NvU64 start = base + (region.first * PAGE_SIZE);
size_t length = uvm_va_block_region_size(region);
NV_STATUS status;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space,
vma,
start,
length,
access_type,
UVM_ATS_SERVICE_TYPE_FAULTS,
ats_context);
if (status != NV_OK)
return status;
uvm_page_mask_region_fill(faults_serviced_mask, region);
// WAR for older kernel versions missing an SMMU invalidate on RO -> RW
// transition. The SMMU and GPU could have the stale RO copy cached in their
// TLBs, which could have caused this write fault. This operation
// invalidates the SMMU TLBs but not the GPU TLBs. That will happen below as
// necessary.
if (access_type == UVM_FAULT_ACCESS_TYPE_WRITE)
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
// The Linux kernel does not invalidate TLB entries on an invalid to valid
// PTE transition. The GPU might have the invalid PTE cached in its TLB.
// The GPU will re-fetch an entry on access if the PTE is invalid and the
// page size is not 4K, but if the page size is 4K no re-fetch will happen
// and the GPU will fault despite the CPU PTE being valid. We don't know
// whether these faults happened due to stale entries after a transition,
// so use the hammer of always invalidating the GPU's TLB on each fault.
//
// The second case is similar and handles missing ATS invalidations on RO ->
// RW transitions for all page sizes. See the uvm_ats_smmu_invalidate_tlbs()
// call above.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K || (UVM_ATS_SMMU_WAR_REQUIRED() && access_type == UVM_FAULT_ACCESS_TYPE_WRITE)) {
flush_tlb_va_region(gpu_va_space, start, length, ats_context->client_type);
}
else {
// ARM requires TLB invalidations on RO -> RW, but not all architectures
// do. If we implement ATS support on other architectures, we might need
// to issue GPU invalidates.
UVM_ASSERT(NVCPU_IS_AARCH64);
}
return NV_OK;
}
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
@@ -459,12 +542,11 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
NV_STATUS status = NV_OK;
uvm_va_block_region_t subregion;
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_fault_client_type_t client_type = ats_context->client_type;
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@@ -480,7 +562,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_zero(reads_serviced_mask);
if (!(vma->vm_flags & VM_READ))
return status;
return NV_OK;
if (!(vma->vm_flags & VM_WRITE)) {
// If VMA doesn't have write permissions, all write faults are fatal.
@@ -496,72 +578,65 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
// There are no pending faults beyond write faults to RO region.
if (uvm_page_mask_empty(read_fault_mask))
return status;
return NV_OK;
}
ats_batch_select_residency(gpu_va_space, vma, ats_context);
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
ats_compute_prefetch(gpu_va_space, vma, base, UVM_ATS_SERVICE_TYPE_FAULTS, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
UVM_FAULT_ACCESS_TYPE_WRITE :
UVM_FAULT_ACCESS_TYPE_READ;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
uvm_fault_access_type_t access_type;
uvm_page_mask_t *serviced_mask;
if (vma->vm_flags & VM_WRITE) {
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
// The Linux kernel never invalidates TLB entries on mapping
// permission upgrade. This is a problem if the GPU has cached
// entries with the old permission. The GPU will re-fetch the entry
// if the PTE is invalid and page size is not 4K (this is the case
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
// has the PTEs cached with R/O permissions we will enter an
// infinite loop because we just forward the fault to the Linux
// kernel and it will see that the permissions in the page table are
// correct. Therefore, we flush TLB entries on ATS write faults.
flush_tlb_va_region(gpu_va_space, start, length, client_type);
access_type = UVM_FAULT_ACCESS_TYPE_WRITE;
serviced_mask = faults_serviced_mask;
}
else {
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
// write_fault_mask contains just the addresses with both read and
// fatal write faults, so we need to service the read component.
access_type = UVM_FAULT_ACCESS_TYPE_READ;
serviced_mask = reads_serviced_mask;
}
status = uvm_ats_service_faults_region(gpu_va_space,
vma,
base,
subregion,
access_type,
ats_context,
serviced_mask);
if (status != NV_OK)
return status;
}
// Remove write faults from read_fault_mask
// Remove write faults from read_fault_mask to avoid double-service
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
status = uvm_ats_service_faults_region(gpu_va_space,
vma,
base,
subregion,
UVM_FAULT_ACCESS_TYPE_READ,
ats_context,
faults_serviced_mask);
if (status != NV_OK)
return status;
}
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
// Similarly to permission upgrade scenario, discussed above, GPU
// will not re-fetch the entry if the PTE is invalid and page size
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
// new translation written explicitly like in the case of permission
// upgrade.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
flush_tlb_va_region(gpu_va_space, start, length, client_type);
// Handle HW prefetch only faults
for_each_va_block_subregion_in_mask(subregion, prefetch_only_fault_mask, region) {
status = uvm_ats_service_faults_region(gpu_va_space,
vma,
base,
subregion,
UVM_FAULT_ACCESS_TYPE_PREFETCH,
ats_context,
faults_serviced_mask);
if (status != NV_OK)
return status;
}
return status;
@@ -637,6 +712,8 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_page_mask_zero(&ats_context->access_counters.migrated_mask);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
@@ -650,21 +727,27 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
// Remove pages which are already resident at the intended destination from
// the accessed_mask.
uvm_page_mask_andnot(&ats_context->accessed_mask,
&ats_context->accessed_mask,
uvm_page_mask_andnot(&ats_context->access_counters.accessed_mask,
&ats_context->access_counters.accessed_mask,
&ats_context->prefetch_state.residency_mask);
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
uvm_page_mask_t *migrated_mask = &ats_context->access_counters.migrated_mask;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
// clear access counters if pages were migrated or migration needs to
// be retried
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
uvm_page_mask_region_fill(migrated_mask, subregion);
else if (status != NV_WARN_NOTHING_TO_DO)
return status;
}

View File

@@ -29,18 +29,19 @@
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
// type for individual pages in the range requested by page masks set in
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
// addresses fall completely within the VMA. The caller is also responsible for
// ensuring that the faulting addresses don't overlap a GMMU region. (See
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
// any errors returned by this function (fault cancellations etc.).
// ats_context->fault.read_fault_mask/write_fault_mask/prefetch_only_mask.
// base must be aligned to UVM_VA_BLOCK_SIZE. The caller is responsible for
// ensuring that faulting addresses fall completely within the VMA. The caller
// is also responsible for ensuring that the faulting addresses don't overlap
// a GMMU region. (See uvm_ats_check_in_gmmu_region). The caller is also
// responsible for handling any errors returned by this function (fault
// cancellations etc.).
//
// Returns the fault service status in ats_context->faults_serviced_mask. In
// addition, ats_context->reads_serviced_mask returns whether read servicing
// worked on write faults iff the read service was also requested in the
// corresponding bit in read_fault_mask. These returned masks are only valid if
// the return status is NV_OK. Status other than NV_OK indicate system global
// Returns the fault service status in ats_context->fault.faults_serviced_mask.
// In addition, ats_context->fault.reads_serviced_mask returns whether read
// servicing worked on write faults iff the read service was also requested in
// the corresponding bit in read_fault_mask. These returned masks are only valid
// if the return status is NV_OK. Status other than NV_OK indicate system global
// fault servicing failures.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
@@ -52,9 +53,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
// Service access counter notifications on ATS regions in the range (base, base
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
// The caller is responsible for ensuring that the addresses in the
// accessed_mask is completely covered by the VMA. The caller is also
// set in ats_context->access_counters.accessed_mask. base must be aligned to
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that the addresses
// in the accessed_mask is completely covered by the VMA. The caller is also
// responsible for handling any errors returned by this function.
//
// Returns NV_OK if servicing was successful. Any other error indicates an error

View File

@@ -127,12 +127,12 @@ static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
// We always use VCMDQ127 for the WAR
#define VCMDQ 127
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
static void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
{
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
{
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2022-2023 NVIDIA Corporation
Copyright (c) 2022-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -51,14 +51,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
// See uvm_mmu.h for mapping placement
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
// TODO: Bug 3953852: Set this to true pending Blackwell changes
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
parent_gpu->ce_phys_vidmem_write_supported = true;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
@@ -102,4 +104,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = true;
parent_gpu->conf_computing.per_channel_key_rotation = true;
}

View File

@@ -37,7 +37,6 @@
#include "uvm_hal_types.h"
#include "uvm_blackwell_fault_buffer.h"
#include "hwref/blackwell/gb100/dev_fault.h"
#include "hwref/blackwell/gb100/dev_mmu.h"
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;

View File

@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
NvU32 key_version,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
key_version,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,
const UvmCslIv *decrypt_iv,
NvU32 key_version,
uvm_mem_t *auth_tag_mem,
size_t size,
NvU32 copy_size)
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
dst_plain + i * copy_size,
src_cipher + i * copy_size,
decrypt_iv + i,
key_version,
copy_size,
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
}
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
dst_cipher);
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
if (i > 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
UvmCslIv *decrypt_iv = NULL;
UvmCslIv *encrypt_iv = NULL;
NvU32 key_version;
uvm_tracker_t tracker;
size_t src_plain_size;
@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
// There shouldn't be any key rotation between the end of the push and the
// CPU decryption(s), but it is more robust against test changes to force
// decryption to use the saved key.
key_version = uvm_channel_pool_key_version(push.channel->pool);
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
dst_plain,
dst_cipher,
decrypt_iv,
key_version,
auth_tag_mem,
size,
copy_size),
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
dst_plain,
dst_cipher,
decrypt_iv,
key_version,
auth_tag_mem,
size,
copy_size),

File diff suppressed because it is too large Load Diff

View File

@@ -200,6 +200,7 @@ typedef struct
// num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
// does not support TSG for CE engines, a HW TSG is not created, but a TSG
// object is required to allocate channels.
//
// When Confidential Computing mode is enabled, the WLC and LCIC channel
// types require one TSG for each WLC/LCIC pair of channels. In this case,
// we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
@@ -228,21 +229,65 @@ typedef struct
// variant is required when the thread holding the pool lock must sleep
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
// RM.
union {
union
{
uvm_spinlock_t spinlock;
uvm_mutex_t mutex;
};
// Secure operations require that uvm_push_begin order matches
// uvm_push_end order, because the engine's state is used in its internal
// operation and each push may modify this state. push_locks is protected by
// the channel pool lock.
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
struct
{
// Secure operations require that uvm_push_begin order matches
// uvm_push_end order, because the engine's state is used in its
// internal operation and each push may modify this state.
// push_locks is protected by the channel pool lock.
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// Per channel buffers in unprotected sysmem.
uvm_rm_mem_t *pool_sysmem;
// Per channel buffers in protected vidmem.
uvm_rm_mem_t *pool_vidmem;
struct
{
// Current encryption key version, incremented upon key rotation.
// While there are separate keys for encryption and decryption, the
// two keys are rotated at once, so the versioning applies to both.
NvU32 version;
// Lock used to ensure mutual exclusion during key rotation.
uvm_mutex_t mutex;
// CSL contexts passed to RM for key rotation. This is usually an
// array containing the CSL contexts associated with the channels in
// the pool. In the case of the WLC pool, the array also includes
// CSL contexts associated with LCIC channels.
UvmCslContext **csl_contexts;
// Number of elements in the CSL context array.
unsigned num_csl_contexts;
// Number of bytes encrypted, or decrypted, on the engine associated
// with the pool since the last key rotation. Only used during
// testing, to force key rotations after a certain encryption size,
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
//
// Encryptions on a LCIC pool are accounted for in the paired WLC
// pool.
//
// TODO: Bug 4612912: these accounting variables can be removed once
// RM exposes an API to set the key rotation lower threshold.
atomic64_t encrypted;
atomic64_t decrypted;
} key_rotation;
} conf_computing;
} uvm_channel_pool_t;
struct uvm_channel_struct
@@ -322,43 +367,14 @@ struct uvm_channel_struct
// work launches to match the order of push end-s that triggered them.
volatile NvU32 gpu_put;
// Static pushbuffer for channels with static schedule (WLC/LCIC)
uvm_rm_mem_t *static_pb_protected_vidmem;
// Static pushbuffer staging buffer for WLC
uvm_rm_mem_t *static_pb_unprotected_sysmem;
void *static_pb_unprotected_sysmem_cpu;
void *static_pb_unprotected_sysmem_auth_tag_cpu;
// The above static locations are required by the WLC (and LCIC)
// schedule. Protected sysmem location completes WLC's independence
// from the pushbuffer allocator.
// Protected sysmem location makes WLC independent from the pushbuffer
// allocator. Unprotected sysmem and protected vidmem counterparts
// are allocated from the channel pool (sysmem, vidmem).
void *static_pb_protected_sysmem;
// Static tracking semaphore notifier values
// Because of LCIC's fixed schedule, the secure semaphore release
// mechanism uses two additional static locations for incrementing the
// notifier values. See:
// . channel_semaphore_secure_release()
// . setup_lcic_schedule()
// . internal_channel_submit_work_wlc()
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
// Explicit location for push launch tag used by WLC.
// Encryption auth tags have to be located in unprotected sysmem.
void *launch_auth_tag_cpu;
NvU64 launch_auth_tag_gpu_va;
// Used to decrypt the push back to protected sysmem.
// This happens when profilers register callbacks for migration data.
uvm_push_crypto_bundle_t *push_crypto_bundles;
// Accompanying authentication tags for the crypto bundles
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
} conf_computing;
// RM channel information
@@ -401,6 +417,8 @@ struct uvm_channel_struct
struct list_head channel_list_node;
NvU32 pending_event_count;
} tools;
bool suspended_p2p;
};
struct uvm_channel_manager_struct
@@ -451,8 +469,24 @@ struct uvm_channel_manager_struct
UVM_BUFFER_LOCATION gpput_loc;
UVM_BUFFER_LOCATION pushbuffer_loc;
} conf;
struct
{
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
// only be false during (de)initialization.
bool wlc_ready;
// True indicates that key rotation is enabled (UVM-wise).
bool key_rotation_enabled;
} conf_computing;
};
// Index of a channel pool within the manager
static unsigned uvm_channel_pool_index_in_channel_manager(const uvm_channel_pool_t *pool)
{
return pool - pool->manager->channel_pools;
}
// Create a channel manager for the GPU
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
@@ -501,6 +535,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool);
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
{
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
@@ -518,6 +560,11 @@ static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
return !uvm_channel_pool_is_sec2(pool);
}
static bool uvm_channel_is_p2p(uvm_channel_t *channel)
{
return uvm_channel_pool_is_p2p(channel->pool);
}
static bool uvm_channel_is_ce(uvm_channel_t *channel)
{
return uvm_channel_pool_is_ce(channel->pool);
@@ -532,6 +579,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
return UVM_CHANNEL_TYPE_MEMOPS;
}
// Force key rotation in the engine associated with the given channel pool.
// Rotation may still not happen if RM cannot acquire the necessary locks (in
// which case the function returns NV_ERR_STATE_IN_USE).
//
// This function should be only invoked in pools in which key rotation is
// enabled.
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
// Retrieve the current encryption key version associated with the channel pool.
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
// Privileged channels support all the Host and engine methods, while
// non-privileged channels don't support privileged methods.
//
@@ -542,14 +600,25 @@ bool uvm_channel_is_privileged(uvm_channel_t *channel);
// Destroy the channel manager
void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
// Suspend p2p traffic on channels used for p2p operations.
// This is used in STO recovery sequence to quiet nvlink traffic before the
// links can be restored.
NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager);
// Resume p2p traffic on channels used for p2p operations.
// This is used at the end of the STO recovery sequence to resume suspended p2p
// traffic on p2p channels.
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager);
// Get the current status of the channel
// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
// otherwise. Notably this never sets the global fatal error.
// Returns NV_OK if the channel is in a good state,
// NV_ERR_RC_ERROR otherwise.
// Notably this never sets the global fatal error.
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
// Check for channel errors
// Checks for channel errors by calling uvm_channel_get_status(). If an error
// occurred, sets the global fatal error and prints errors.
// Checks for channel errors by calling uvm_channel_get_status().
// If a fatal error occurred, sets the global fatal error and prints errors.
NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
// Check errors on all channels in the channel manager
@@ -579,13 +648,11 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
// beginning.
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
// Check if WLC/LCIC mechanism is ready/setup
// Should only return false during initialization
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
{
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
return manager->conf_computing.wlc_ready;
}
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
// associated with access_channel.
//

View File

@@ -206,9 +206,10 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
uvm_for_each_pool(pool, manager) {
uvm_channel_t *channel;
// Skip LCIC channels as those can't accept any pushes
if (uvm_channel_pool_is_lcic(pool))
continue;
// Skip LCIC channels as those can't accept any pushes
if (uvm_channel_pool_is_lcic(pool))
continue;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
for (i = 0; i < 512; ++i) {
@@ -796,11 +797,8 @@ done:
static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool;
uvm_push_t *pushes;
uvm_gpu_t *gpu;
NvU32 i;
NvU32 num_pushes;
uvm_push_t *pushes = NULL;
uvm_gpu_t *gpu = NULL;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
@@ -810,9 +808,19 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_type_t channel_type;
// Key rotation is disabled because this test relies on nested pushes,
// which is illegal. If any push other than the first one triggers key
// rotation, the test won't complete. This is because key rotation
// depends on waiting for ongoing pushes to end, which doesn't happen
// if those pushes are ended after the current one begins.
uvm_conf_computing_disable_key_rotation(gpu);
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_RET(pool != NULL);
NvU32 i;
NvU32 num_pushes;
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_GOTO(pool != NULL, error);
// Skip LCIC channels as those can't accept any pushes
if (uvm_channel_pool_is_lcic(pool))
@@ -824,7 +832,7 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
TEST_CHECK_RET(pushes != NULL);
TEST_CHECK_GOTO(pushes != NULL, error);
for (i = 0; i < num_pushes; i++) {
uvm_push_t *push = &pushes[i];
@@ -841,12 +849,18 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
uvm_kvfree(pushes);
}
uvm_conf_computing_enable_key_rotation(gpu);
}
uvm_thread_context_lock_enable_tracking();
return status;
error:
if (gpu != NULL)
uvm_conf_computing_enable_key_rotation(gpu);
uvm_thread_context_lock_enable_tracking();
uvm_kvfree(pushes);
@@ -948,6 +962,318 @@ release:
return NV_OK;
}
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
{
unsigned num_tries;
unsigned max_num_tries = 20;
unsigned num_rotations_completed = 0;
if (num_rotations == 0)
return NV_OK;
// The number of accepted rotations is kept low, so failed rotation
// invocations due to RM not acquiring the necessary locks (which imply a
// sleep in the test) do not balloon the test execution time.
UVM_ASSERT(num_rotations <= 10);
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
// Force key rotation, irrespective of encryption usage.
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
// Key rotation may not be able to complete due to RM failing to acquire
// the necessary locks. Detect the situation, sleep for a bit, and then
// try again
//
// The maximum time spent sleeping in a single rotation call is
// (max_num_tries * max_sleep_us)
if (status == NV_ERR_STATE_IN_USE) {
NvU32 min_sleep_us = 1000;
NvU32 max_sleep_us = 10000;
usleep_range(min_sleep_us, max_sleep_us);
continue;
}
TEST_NV_CHECK_RET(status);
num_rotations_completed++;
}
// If not a single key rotation occurred, the dependent tests still pass,
// but there is no much value to them. Instead, return an error so the
// maximum number of tries, or the maximum sleep time, are adjusted to
// ensure that at least one rotation completes.
if (num_rotations_completed > 0)
return NV_OK;
else
return NV_ERR_STATE_IN_USE;
}
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
{
return force_key_rotations(pool, 1);
}
// Test key rotation in all pools. This is useful because key rotation may not
// happen otherwise on certain engines during UVM test execution. For example,
// if the MEMOPS channel type is mapped to a CE not shared with any other
// channel type, then the only encryption taking place in the engine is due to
// semaphore releases (4 bytes each). This small encryption size makes it
// unlikely to exceed even small rotation thresholds.
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
{
uvm_channel_pool_t *pool;
uvm_for_each_pool(pool, gpu->channel_manager) {
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
continue;
TEST_NV_CHECK_RET(force_key_rotation(pool));
}
return NV_OK;
}
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
// key rotations.
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
{
int i;
uvm_channel_pool_t *gpu_to_cpu_pool;
uvm_channel_pool_t *cpu_to_gpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
void *initial_plain_cpu = NULL;
void *final_plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
initial_plain_cpu = uvm_kvmalloc_zero(size);
if (initial_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
final_plain_cpu = uvm_kvmalloc_zero(size);
if (final_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
memset(initial_plain_cpu, 1, size);
for (i = 0; i < 5; i++) {
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
plain_gpu_address,
initial_plain_cpu,
size,
NULL,
"CPU > GPU"),
out);
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
final_plain_cpu,
plain_gpu_address,
size,
NULL,
"GPU > CPU"),
out);
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
memset(final_plain_cpu, 0, size);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(final_plain_cpu);
uvm_kvfree(initial_plain_cpu);
return status;
}
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
{
uvm_push_t push;
uvm_gpu_address_t gpu_address;
uvm_gpu_t *gpu = mem->backing_gpu;
UVM_ASSERT(uvm_mem_is_vidmem(mem));
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_OK;
}
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
// testing to insert key rotations in between the push end, and the CPU
// decryption
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
unsigned num_rotations_to_insert)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
void *src_cipher, *auth_tag;
uvm_channel_t *channel;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
if (status != NV_OK)
goto out;
channel = push.channel;
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
goto out;
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
// If num_rotations_to_insert is not zero, the current encryption key will
// be different from the one used during CE encryption.
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
status = uvm_conf_computing_cpu_decrypt(channel,
dst_plain,
src_cipher,
dma_buffer->decrypt_iv,
dma_buffer->key_version[0],
size,
auth_tag);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
unsigned num_repetitions,
unsigned num_rotations_to_insert)
{
unsigned i;
uvm_channel_pool_t *gpu_to_cpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
NvU8 *plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
return NV_OK;
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
if (plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
for (i = 0; i < num_repetitions; i++) {
unsigned j;
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
plain_cpu,
plain_gpu_address,
size,
num_rotations_to_insert),
out);
for (j = 0; j < size; j++)
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
memset(plain_cpu, 0, size);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(plain_cpu);
return status;
}
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
// that are no longer the current key, due to key rotation. Given that SEC2
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
// is exclusive of CE encryptions.
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
{
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
// between the GPU encryption, and the associated CPU decryption.
unsigned num_rotations_to_insert = 8;
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
return NV_OK;
}
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
for_each_va_space_gpu(gpu, va_space) {
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
break;
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
}
return NV_OK;
}
static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
@@ -967,6 +1293,7 @@ static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
// after their schedule is set up
if (uvm_channel_pool_is_wlc(pool))
continue;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
@@ -1006,6 +1333,7 @@ static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
// after their schedule is set up
if (uvm_channel_pool_is_wlc(pool))
continue;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
uvm_push_t push;
@@ -1094,7 +1422,7 @@ static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
// Release the semaphore.
UVM_WRITE_ONCE(*cpu_ptr, 1);
WRITE_ONCE(*cpu_ptr, 1);
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
@@ -1148,6 +1476,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
// Skip LCIC channels as those can't accept any pushes
if (uvm_channel_pool_is_lcic(pool))
continue;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
uvm_push_t push;
@@ -1203,6 +1532,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
if (status != NV_OK)
goto done;
status = test_channel_key_rotation(va_space);
if (status != NV_OK)
goto done;
// The following tests have side effects, they reset the GPU's
// channel_manager.
status = test_channel_pushbuffer_extension_base(va_space);
@@ -1338,6 +1671,126 @@ done:
return status;
}
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
int i;
uvm_channel_pool_t *cpu_to_gpu_pool;
NV_STATUS status = NV_OK;
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
void *initial_plain_cpu = NULL;
uvm_mem_t *plain_gpu = NULL;
uvm_gpu_address_t plain_gpu_address;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
initial_plain_cpu = uvm_kvmalloc_zero(size);
if (initial_plain_cpu == NULL) {
status = NV_ERR_NO_MEMORY;
goto out;
}
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
memset(initial_plain_cpu, 1, size);
for (i = 0; i < params->iterations; i++) {
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
plain_gpu_address,
initial_plain_cpu,
size,
NULL,
"CPU > GPU"),
out);
}
out:
uvm_mem_free(plain_gpu);
uvm_kvfree(initial_plain_cpu);
return status;
}
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
unsigned num_rotations_to_insert = 0;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
}
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
NvU32 i;
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
for (i = 0; i < params->iterations; ++i) {
NV_STATUS status;
uvm_channel_pool_t *pool;
uvm_channel_type_t type;
if ((i % 3) == 0)
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
else if ((i % 3) == 1)
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
else
type = UVM_CHANNEL_TYPE_WLC;
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
return NV_ERR_INVALID_STATE;
status = force_key_rotation(pool);
if (status != NV_OK)
return status;
}
return NV_OK;
}
// The objective of this test is documented in the user-level function
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
uvm_test_rng_t rng;
uvm_gpu_t *gpu;
NV_STATUS status = NV_OK;
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
uvm_test_rng_init(&rng, params->seed);
uvm_va_space_down_read(va_space);
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
// one.
gpu = random_va_space_gpu(&rng, va_space);
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
goto out;
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
status = channel_stress_key_rotation_rotate(gpu, params);
else
status = NV_ERR_INVALID_PARAMETER;
out:
uvm_va_space_up_read(va_space);
return status;
}
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
@@ -1349,6 +1802,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
return uvm_test_channel_stress_update_channels(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
return uvm_test_channel_noop_push(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
return uvm_test_channel_stress_key_rotation(va_space, params);
default:
return NV_ERR_INVALID_PARAMETER;
}

View File

@@ -281,29 +281,6 @@ NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
return NV_OK;
}
// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
// what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
// that we know that we have a real, binary formatted UUID that will work in
// the UVM APIs.
//
// It comes out like this:
//
// UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// This routine will always null-terminate the string for you. This is true
// even if the buffer was too small!
//
// Return value is the number of non-null characters written.
//
// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
// return it's default format, which is ascii, not binary, then you would get
// this back:
//
// GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// ...which is actually a character string, and won't work for UVM API calls.
// So it's very important to be able to see the difference.
//
static char uvm_digit_to_hex(unsigned value)
{
if (value >= 10)
@@ -312,27 +289,19 @@ static char uvm_digit_to_hex(unsigned value)
return value + '0';
}
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
void uvm_uuid_string(char *buffer, const NvProcessorUuid *pUuidStruct)
{
char *str = buffer+8;
char *str = buffer;
unsigned i;
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
return *buffer = 0;
memcpy(buffer, "UVM-GPU-", 8);
for (i = 0; i < 16; i++) {
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
if (dashMask & (1 << (i+1)))
if (dashMask & (1 << (i + 1)))
*str++ = '-';
}
*str = 0;
return (int)(str-buffer);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2023 NVIDIA Corporation
Copyright (c) 2013-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -50,9 +50,12 @@ enum {
NVIDIA_UVM_NUM_MINOR_DEVICES
};
#define UVM_GPU_UUID_TEXT_BUFFER_LENGTH (8+16*2+4+1)
// UUID has the format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
#define UVM_UUID_STRING_LENGTH ((8 + 1) + 3 * (4 + 1) + 12 + 1)
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pGpuUuid);
// Writes UVM_UUID_STRING_LENGTH characters into buffer, including a terminating
// NULL.
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
func(prefix "%s:%u %s[pid:%d]" fmt, \
@@ -98,27 +101,9 @@ bool uvm_debug_prints_enabled(void);
#define UVM_INFO_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
//
// Please see the documentation of format_uuid_to_buffer, for details on what
// this routine prints for you.
//
#define UVM_DBG_PRINT_UUID(msg, uuidPtr) \
do { \
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
UVM_DBG_PRINT("%s: %s\n", msg, uuidBuffer); \
} while (0)
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
#define UVM_ERR_PRINT_UUID(msg, uuidPtr, ...) \
do { \
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", uuidBuffer, ##__VA_ARGS__); \
} while (0)
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
#define UVM_PANIC_MSG(fmt, ...) UVM_PRINT_FUNC(panic, ": " fmt, ##__VA_ARGS__)
@@ -395,7 +380,7 @@ static inline void uvm_touch_page(struct page *page)
UVM_ASSERT(page);
mapping = (char *) kmap(page);
(void)UVM_READ_ONCE(*mapping);
(void)READ_ONCE(*mapping);
kunmap(page);
}
@@ -423,7 +408,9 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
UVM_ASSERT(first);
UVM_ASSERT(outer);
if (uvm_platform_uses_canonical_form_address()) {
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
// even when plugged into platforms using it.
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
*first = 1ULL << (num_va_bits - 1);
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2023 NVIDIA Corporation
Copyright (c) 2021-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -33,6 +33,15 @@
#include "nv_uvm_interface.h"
#include "uvm_va_block.h"
// Amount of encrypted data on a given engine that triggers key rotation. This
// is a UVM internal threshold, different from that of RM, and used only during
// testing.
//
// Key rotation is triggered when the total encryption size, or the total
// decryption size (whatever comes first) reaches this lower threshold on the
// engine.
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
// The maximum number of secure operations per push is:
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
@@ -56,32 +65,13 @@ static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_I
module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
{
return parent->rm_info.gpuConfComputeCaps.mode;
}
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
{
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
}
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
{
uvm_parent_gpu_t *other_parent;
UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
// The Confidential Computing state of the GPU should match that of the
// system.
UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
// All GPUs derive Confidential Computing status from their parent. By
// current policy all parent GPUs have identical Confidential Computing
// status.
for_each_parent_gpu(other_parent)
UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
// Confidential Computing enablement on the system should match enablement
// on the GPU.
UVM_ASSERT(parent->rm_info.gpuConfComputeCaps.bConfComputingEnabled == g_uvm_global.conf_computing_enabled);
}
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
@@ -334,9 +324,6 @@ static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
{
NV_STATUS status;
if (!uvm_conf_computing_mode_is_hcc(gpu))
return NV_OK;
status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
if (status != NV_OK)
return status;
@@ -352,6 +339,19 @@ error:
return status;
}
// The production key rotation defaults are such that key rotations rarely
// happen. During UVM testing more frequent rotations are triggering by relying
// on internal encryption usage accounting. When key rotations are triggered by
// UVM, the driver does not rely on channel key rotation notifiers.
//
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
// lower threshold. This function, and all the metadata associated with it
// (per-pool encryption accounting, for example) can be removed at that point.
static bool key_rotation_is_notifier_driven(void)
{
return !uvm_enable_builtin_tests;
}
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
{
NV_STATUS status;
@@ -394,17 +394,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
}
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
{
NV_STATUS status;
uvm_channel_pool_t *pool;
if (uvm_channel_is_lcic(channel))
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
else
pool = channel->pool;
uvm_mutex_lock(&channel->csl.ctx_lock);
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
if (!key_rotation_is_notifier_driven())
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
}
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// IV rotation is done preemptively as needed, so the above
// call cannot return failure.
UVM_ASSERT(status == NV_OK);
uvm_mutex_unlock(&channel->csl.ctx_lock);
}
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
@@ -428,27 +446,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
void *auth_tag_buffer)
{
NV_STATUS status;
uvm_channel_pool_t *pool;
UVM_ASSERT(size);
if (uvm_channel_is_lcic(channel))
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
else
pool = channel->pool;
uvm_mutex_lock(&channel->csl.ctx_lock);
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
size,
(NvU8 const *) src_plain,
encrypt_iv,
(NvU8 *) dst_cipher,
(NvU8 *) auth_tag_buffer);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// IV rotation is done preemptively as needed, so the above
// call cannot return failure.
UVM_ASSERT(status == NV_OK);
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
// Informing RM of an encryption/decryption should not fail
UVM_ASSERT(status == NV_OK);
if (!key_rotation_is_notifier_driven())
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
}
uvm_mutex_unlock(&channel->csl.ctx_lock);
}
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
void *dst_plain,
const void *src_cipher,
const UvmCslIv *src_iv,
NvU32 key_version,
size_t size,
const void *auth_tag_buffer)
{
@@ -469,11 +506,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
size,
(const NvU8 *) src_cipher,
src_iv,
NV_U32_MAX,
key_version,
(NvU8 *) dst_plain,
NULL,
0,
(const NvU8 *) auth_tag_buffer);
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
nvstatusToString(status),
channel->name,
uvm_gpu_name(uvm_channel_get_gpu(channel)));
}
uvm_mutex_unlock(&channel->csl.ctx_lock);
return status;
@@ -640,3 +685,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
{
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
}
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
{
if (!g_uvm_global.conf_computing_enabled)
return;
// Key rotation cannot be enabled on UVM if it is disabled on RM
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
return;
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
}
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
{
if (!g_uvm_global.conf_computing_enabled)
return;
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
}
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
{
return gpu->channel_manager->conf_computing.key_rotation_enabled;
}
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
{
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
return false;
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
// because currently the encryption key is shared between UVM and RM, but
// UVM is not able to idle SEC2 channels owned by RM.
if (uvm_channel_pool_is_sec2(pool))
return false;
// Key rotation happens as part of channel reservation, and LCIC channels
// are never reserved directly. Rotation of keys in LCIC channels happens
// as the result of key rotation in WLC channels.
//
// Return false even if there is nothing fundamental prohibiting direct key
// rotation on LCIC pools
if (uvm_channel_pool_is_lcic(pool))
return false;
return true;
}
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
{
NvU64 decrypted, encrypted;
UVM_ASSERT(!key_rotation_is_notifier_driven());
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
return true;
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
return true;
return false;
}
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
{
// If key rotation is pending for the pool's engine, then the key rotation
// notifier in any of the engine channels can be used by UVM to detect the
// situation. Note that RM doesn't update all the notifiers in a single
// atomic operation, so it is possible that the channel read by UVM (the
// first one in the pool) indicates that a key rotation is pending, but
// another channel in the pool (temporarily) indicates the opposite, or vice
// versa.
uvm_channel_t *first_channel = pool->channels;
UVM_ASSERT(key_rotation_is_notifier_driven());
UVM_ASSERT(first_channel != NULL);
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
}
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
{
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
return false;
if (key_rotation_is_notifier_driven())
return conf_computing_is_key_rotation_pending_use_notifier(pool);
else
return conf_computing_is_key_rotation_pending_use_stats(pool);
}
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
{
NV_STATUS status;
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
// required locks at this time. This status is not interpreted as an error,
// but as a sign for UVM to try again later. This is the same "protocol"
// used in IV rotation.
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
pool->conf_computing.key_rotation.num_csl_contexts);
if (status == NV_OK) {
pool->conf_computing.key_rotation.version++;
if (!key_rotation_is_notifier_driven()) {
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
}
}
else if (status != NV_ERR_STATE_IN_USE) {
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
pool->engine_index,
nvstatusToString(status));
}
return status;
}
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
void *src_plain,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
void *dst_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...)
{
NV_STATUS status;
uvm_push_t push;
uvm_conf_computing_dma_buffer_t *dma_buffer;
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
void *src_cipher, *auth_tag;
va_list args;
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
if (status != NV_OK)
return status;
va_start(args, format);
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
va_end(args);
if (status != NV_OK)
goto out;
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
status = uvm_push_end_and_wait(&push);
if (status != NV_OK)
goto out;
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
status = uvm_conf_computing_cpu_decrypt(push.channel,
dst_plain,
src_cipher,
dma_buffer->decrypt_iv,
dma_buffer->key_version[0],
size,
auth_tag);
out:
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
return status;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2023 NVIDIA Corporation
Copyright (c) 2021-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -62,8 +62,6 @@
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
typedef struct
{
// List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
@@ -87,9 +85,9 @@ typedef struct
// a free buffer.
uvm_tracker_t tracker;
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
// writes the authentication tag here. Later when the buffer is decrypted
// on the CPU the authentication tag is used again (read) for CSL to verify
// When the DMA buffer is used as the destination of a GPU encryption, the
// engine (CE or SEC2) writes the authentication tag here. When the buffer
// is decrypted on the CPU the authentication tag is used by CSL to verify
// the authenticity. The allocation is big enough for one authentication
// tag per PAGE_SIZE page in the alloc buffer.
uvm_mem_t *auth_tag;
@@ -98,7 +96,12 @@ typedef struct
// to the authentication tag. The allocation is big enough for one IV per
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
// IV and authentication tag must match.
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
// When the DMA buffer is used as the destination of a GPU encryption, the
// key version used during GPU encryption of each PAGE_SIZE page can be
// saved here, so CPU decryption uses the correct decryption key.
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
// Bitmap of the encrypted pages in the backing allocation
uvm_page_mask_t encrypted_page_mask;
@@ -147,7 +150,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
// Logs encryption information from the GPU and returns the IV.
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
// Acquires next CPU encryption IV and returns it.
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
@@ -167,10 +170,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
// CPU side decryption helper. Decrypts data from src_cipher and writes the
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
//
// The caller must indicate which key to use for decryption by passing the
// appropiate key version number.
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
void *dst_plain,
const void *src_cipher,
const UvmCslIv *src_iv,
NvU32 key_version,
size_t size,
const void *auth_tag_buffer);
@@ -214,4 +221,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
// Check if there are fewer than 'limit' messages available in either direction
// and rotate if not.
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
// Rotate the engine key associated with the given channel pool.
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
// Returns true if key rotation is allowed in the channel pool.
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
// Returns true if key rotation is pending in the channel pool.
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
// dependent on RM enablement: key rotation may still be disabled upon calling
// this function, if it is disabled in RM. On the other hand, key rotation can
// be disabled in UVM, even if it is enabled in RM.
//
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
// kernel key rotation.
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
// Launch a synchronous, encrypted copy between CPU and GPU.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
// text) contents; the function internally performs a CPU-side encryption step
// before launching the GPU-side CE decryption. The source buffer can be in
// protected or unprotected sysmem, while the destination buffer must be in
// protected vidmem.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
uvm_gpu_address_t dst_gpu_address,
void *src_plain,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...);
// Launch a synchronous, encrypted copy between CPU and GPU.
//
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
//
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
// text) contents; the function internally performs a CPU-side encryption step
// before launching the GPU-side CE decryption. The source buffer can be in
// protected or unprotected sysmem, while the destination buffer must be in
// protected vidmem.
//
// The input tracker, if not NULL, is internally acquired by the push
// responsible for the encrypted copy.
__attribute__ ((format(printf, 6, 7)))
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
void *dst_plain,
uvm_gpu_address_t src_gpu_address,
size_t size,
uvm_tracker_t *tracker,
const char *format,
...);
#endif // __UVM_CONF_COMPUTING_H__

View File

@@ -1,53 +0,0 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// This file provides simple wrappers that are always built with optimizations
// turned on to WAR issues with functions that don't build correctly otherwise.
#include "uvm_linux.h"
int nv_atomic_xchg(atomic_t *val, int new)
{
return atomic_xchg(val, new);
}
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
{
return atomic_cmpxchg(val, old, new);
}
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
{
return atomic_long_cmpxchg(val, old, new);
}
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
{
return copy_from_user(to, from, n);
}
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
{
return copy_to_user(to, from, n);
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -42,7 +42,6 @@ typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
typedef struct uvm_gpu_semaphore_pool_page_struct uvm_gpu_semaphore_pool_page_t;
typedef struct uvm_gpu_peer_struct uvm_gpu_peer_t;
typedef struct uvm_mmu_mode_hal_struct uvm_mmu_mode_hal_t;
typedef struct uvm_channel_manager_struct uvm_channel_manager_t;
@@ -57,6 +56,12 @@ typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
typedef struct uvm_va_policy_struct uvm_va_policy_t;
typedef struct uvm_va_range_struct uvm_va_range_t;
typedef struct uvm_va_range_managed_struct uvm_va_range_managed_t;
typedef struct uvm_va_range_external_struct uvm_va_range_external_t;
typedef struct uvm_va_range_channel_struct uvm_va_range_channel_t;
typedef struct uvm_va_range_sked_reflected_struct uvm_va_range_sked_reflected_t;
typedef struct uvm_va_range_semaphore_pool_struct uvm_va_range_semaphore_pool_t;
typedef struct uvm_va_range_device_p2p_struct uvm_va_range_device_p2p_t;
typedef struct uvm_va_block_struct uvm_va_block_t;
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;

View File

@@ -115,8 +115,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
TEST_CHECK_RET(skip);
memory_owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &memory_info->uuid);
if (memory_owning_gpu == NULL)
memory_owning_gpu = uvm_va_space_get_gpu_by_mem_info(va_space, memory_info);
if (!memory_owning_gpu)
return NV_ERR_INVALID_DEVICE;
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
@@ -129,7 +129,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
phys_offset = mapping_offset;
// Add the physical offset for nvswitch connected peer mappings
if (uvm_aperture_is_peer(aperture) && uvm_gpus_are_nvswitch_connected(memory_mapping_gpu, memory_owning_gpu))
if (uvm_aperture_is_peer(aperture) &&
uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {

View File

@@ -54,6 +54,9 @@ static NV_STATUS uvm_register_callbacks(void)
g_exported_uvm_ops.stopDevice = NULL;
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
// Register the UVM callbacks with the main GPU driver:
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
if (status != NV_OK)
@@ -100,12 +103,6 @@ NV_STATUS uvm_global_init(void)
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_procfs_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
@@ -194,15 +191,22 @@ NV_STATUS uvm_global_init(void)
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
// this point:
// This sets up the ISR (interrupt service routine), by hooking into RM's
// top-half ISR callback. As soon as this call completes, GPU interrupts
// will start arriving, so it's important to be prepared to receive
// interrupts before this point.
status = uvm_register_callbacks();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
return NV_OK;
error:
@@ -214,9 +218,7 @@ void uvm_global_exit(void)
{
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
// Guarantee completion of any release callbacks scheduled after the flush
// in uvm_resume().
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_service_block_context_exit();
@@ -237,7 +239,6 @@ void uvm_global_exit(void)
uvm_procfs_exit();
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
nv_kthread_q_stop(&g_uvm_global.global_q);
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
@@ -412,7 +413,7 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
UVM_ASSERT(error != NV_OK);
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
previous_error = atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
if (previous_error == NV_OK) {
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
@@ -421,6 +422,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
nvstatusToString(error), nvstatusToString(previous_error));
}
nvUvmInterfaceReportFatalError(error);
}
NV_STATUS uvm_global_reset_fatal_error(void)
@@ -430,7 +433,7 @@ NV_STATUS uvm_global_reset_fatal_error(void)
return NV_ERR_INVALID_STATE;
}
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
return atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
}
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
@@ -470,3 +473,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
return NV_OK;
}
static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
{
NV_STATUS status = NV_OK;
uvm_parent_gpu_t *parent_gpu;
uvm_mutex_lock(&g_uvm_global.global_lock);
// NVLINK STO recovery is not supported in combination with MIG
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
if (!parent_gpu || parent_gpu->smc.enabled) {
status = NV_ERR_INVALID_DEVICE;
goto unlock;
}
status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
unlock:
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
{
NV_STATUS status = NV_OK;
uvm_parent_gpu_t *parent_gpu;
uvm_mutex_lock(&g_uvm_global.global_lock);
// NVLINK STO recovery is not supported in combination with MIG
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
if (!parent_gpu || parent_gpu->smc.enabled) {
status = NV_ERR_INVALID_DEVICE;
goto unlock;
}
uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
unlock:
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
{
UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
}
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
{
UVM_ENTRY_RET(resumeP2P(uuid));
}
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
{
uvm_gpu_t *gpu;
for_each_gpu_in_mask(gpu, gpus) {
NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -52,19 +52,23 @@ struct uvm_global_struct
// Created on module load and destroyed on module unload
uvmGpuSessionHandle rm_session_handle;
// peer-to-peer table
// peer info is added and removed from this table when usermode
// driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
// respectively.
uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
// Peer-to-peer table for storing parent GPU and MIG instance peer info.
// Note that MIG instances can be peers within a single parent GPU or
// be peers in different parent GPUs if NVLINK or PCIe peers is enabled.
// PCIe and MIG peer info is added and removed from this table when
// usermode driver calls UvmEnablePeerAccess() and UvmDisablePeerAccess()
// respectively. NvLink and MIG peers are updated when UvmRegisterGpu() and
// UvmUnregisterGpu() are called. Peer to peer state for MIG instances
// within the same parent GPU are not stored here.
uvm_parent_gpu_peer_t parent_gpu_peers[UVM_MAX_UNIQUE_PARENT_GPU_PAIRS];
// peer-to-peer copy mode
// Pascal+ GPUs support virtual addresses in p2p copies.
// Ampere+ GPUs add support for physical addresses in p2p copies.
uvm_gpu_peer_copy_mode_t peer_copy_mode;
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
// do most anything other than try and clean up as much as possible.
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse
// to do most anything other than try and clean up as much as possible.
// An example of a fatal error is an unrecoverable ECC error on one of the
// GPUs.
atomic_t fatal_error;
@@ -167,6 +171,12 @@ NV_STATUS uvm_suspend_entry(void);
// Recover after exit from a system sleep state
NV_STATUS uvm_resume_entry(void);
// Block all P2P traffic on the GPU's channels
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid);
// Resume P2P traffic on the GPU's channels
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid);
// Add parent GPU to the global table.
//
// LOCKING: requires that you hold the global lock and gpu_table_lock
@@ -232,12 +242,12 @@ static uvmGpuSessionHandle uvm_global_session_handle(void)
// suspended.
#define UVM_GPU_WRITE_ONCE(x, val) do { \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_WRITE_ONCE(x, val); \
WRITE_ONCE(x, val); \
} while (0)
#define UVM_GPU_READ_ONCE(x) ({ \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_READ_ONCE(x); \
READ_ONCE(x); \
})
static bool global_is_fatal_error_assert_disabled(void)
@@ -296,7 +306,7 @@ static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *
return gpu;
}
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
static uvm_gpu_t *uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
{
uvm_gpu_id_t gpu_id;
@@ -318,7 +328,45 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
#define for_each_gpu_in_mask(gpu, mask) \
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
gpu = uvm_processor_mask_find_next_gpu(mask, gpu))
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_first_gpu(const uvm_parent_processor_mask_t *mask)
{
uvm_parent_gpu_t *parent_gpu;
uvm_parent_gpu_id_t parent_id = uvm_parent_processor_mask_find_first_gpu_id(mask);
if (UVM_PARENT_ID_IS_INVALID(parent_id))
return NULL;
parent_gpu = uvm_parent_gpu_get(parent_id);
// See comment in uvm_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
return parent_gpu;
}
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_next_gpu(const uvm_parent_processor_mask_t *mask,
uvm_parent_gpu_t *parent_gpu)
{
uvm_parent_gpu_id_t parent_id;
UVM_ASSERT(parent_gpu);
parent_id = uvm_parent_processor_mask_find_next_gpu_id(mask, uvm_parent_id_next(parent_gpu->id));
if (UVM_PARENT_ID_IS_INVALID(parent_id))
return NULL;
parent_gpu = uvm_parent_gpu_get(parent_id);
// See comment in uvm_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
return parent_gpu;
}
// Helper to iterate over all parent GPUs in the input mask
#define for_each_parent_gpu_in_mask(parent_gpu, mask) \
for ((parent_gpu) = uvm_parent_processor_mask_find_first_gpu((mask)); \
(parent_gpu); \
(parent_gpu) = uvm_parent_processor_mask_find_next_gpu((mask), (parent_gpu)))
// Helper to iterate over all GPUs retained by the UVM driver
// (across all va spaces).
@@ -326,7 +374,7 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
gpu = uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
// LOCKING: Must hold either the global_lock or the gpu_table_lock
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -384,7 +432,7 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
// LOCKING: Must hold the global_lock
#define for_each_gpu_in_parent(parent_gpu, gpu) \
#define for_each_gpu_in_parent(gpu, parent_gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
(gpu) != NULL; \
@@ -403,6 +451,10 @@ void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
// Check for nvlink errors for all GPUs in a mask
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus);
// Pre-allocate fault service contexts.
NV_STATUS uvm_service_block_context_init(void);

File diff suppressed because it is too large Load Diff

View File

@@ -49,9 +49,13 @@
#include <linux/mmu_notifier.h>
#include "uvm_conf_computing.h"
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
#define UVM_PARENT_GPU_UUID_PREFIX "GPU-"
#define UVM_GPU_UUID_PREFIX "GI-"
// UVM_UUID_STRING_LENGTH already includes NULL, don't double-count it with
// sizeof()
#define UVM_PARENT_GPU_UUID_STRING_LENGTH (sizeof(UVM_PARENT_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
#define UVM_GPU_UUID_STRING_LENGTH (sizeof(UVM_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
#define UVM_GPU_MAGIC_VALUE 0xc001d00d12341993ULL
@@ -93,6 +97,11 @@ struct uvm_service_block_context_struct
// been serviced
uvm_processor_mask_t resident_processors;
// A mask of GPUs that need to be checked for NVLINK errors before the
// handler returns, but after the VA space lock has been unlocked
// to avoid RM/UVM VA space lock deadlocks.
uvm_processor_mask_t gpus_to_check_for_nvlink_errors;
// VA block region that contains all the pages affected by the operation
uvm_va_block_region_t region;
@@ -184,29 +193,49 @@ struct uvm_service_block_context_struct
typedef struct
{
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t read_fault_mask;
union
{
struct
{
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t prefetch_only_fault_mask;
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t write_fault_mask;
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t read_fault_mask;
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used to return ATS fault status. This is unused for access
// counter service requests.
uvm_page_mask_t faults_serviced_mask;
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t write_fault_mask;
// Mask of successfully serviced read faults on pages in write_fault_mask.
// This is unused for access counter service requests.
uvm_page_mask_t reads_serviced_mask;
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. This is a logical or of read_fault_mask and
// write_mask and prefetch_only_fault_mask.
uvm_page_mask_t accessed_mask;
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. This is used as input for access counter service requests and output
// of fault service requests.
uvm_page_mask_t accessed_mask;
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
// aligned region of a SAM VMA. Used to return ATS fault status.
uvm_page_mask_t faults_serviced_mask;
// Mask of successfully serviced read faults on pages in
// write_fault_mask.
uvm_page_mask_t reads_serviced_mask;
} faults;
struct
{
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA.
uvm_page_mask_t accessed_mask;
// Mask of successfully migrated pages in a UVM_VA_BLOCK_SIZE
// aligned region of a SAM VMA.
uvm_page_mask_t migrated_mask;
} access_counters;
};
// Client type of the service requestor.
uvm_fault_client_type_t client_type;
@@ -249,7 +278,6 @@ typedef struct
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
} prefetch_state;
} uvm_ats_fault_context_t;
struct uvm_fault_service_batch_context_struct
@@ -633,9 +661,10 @@ struct uvm_gpu_struct
NvProcessorUuid uuid;
// Nice printable name in the format:
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
// ID: 999: GPU-<parent_uuid> GI-<gi_uuid>
// UVM_PARENT_GPU_UUID_STRING_LENGTH includes a NULL character but will be
// used for a space instead.
char name[sizeof("ID: 999: ") - 1 + UVM_PARENT_GPU_UUID_STRING_LENGTH - 1 + 1 + UVM_GPU_UUID_STRING_LENGTH];
// Refcount of the gpu, i.e. how many times it has been retained. This is
// roughly a count of how many times it has been registered with a VA space,
@@ -680,8 +709,14 @@ struct uvm_gpu_struct
// True if the platform supports HW coherence and the GPU's memory
// is exposed as a NUMA node to the kernel.
bool enabled;
unsigned int node_id;
int node_id;
} numa;
// Physical address of the start of statically mapped fb memory in BAR1
NvU64 static_bar1_start;
// Size of statically mapped fb memory in BAR1.
NvU64 static_bar1_size;
} mem_info;
struct
@@ -706,9 +741,6 @@ struct uvm_gpu_struct
struct
{
// Mask of peer_gpus set
//
// We can use a regular processor id because P2P is not allowed between
// partitioned GPUs when SMC is enabled
uvm_processor_mask_t peer_gpu_mask;
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
@@ -847,6 +879,38 @@ struct uvm_gpu_struct
NvBool *error_notifier;
} ecc;
// NVLINK STO recovery handling
// In order to trap STO errors as soon as possible the driver has the hw
// interrupt register mapped directly. If an STO interrupt is ever noticed
// to be pending, then the UVM driver needs to:
//
// 1) ask RM to service interrupts, and then
// 2) inspect the NVLINK error notifier state.
//
// Notably, checking for channel errors is not enough, because STO errors
// can be pending, even after a channel has become idle.
//
// See more details in uvm_gpu_check_nvlink_error().
struct
{
// Does the GPU have NVLINK STO recovery enabled?
bool enabled;
// Artificially injected error for testing
atomic_t injected_error;
// Direct mapping of the 32-bit part of the hw interrupt tree that has
// the NVLINK error bits.
volatile NvU32 *hw_interrupt_tree_location;
// Mask to get the NVLINK error interrupt bits from the 32-bits above.
NvU32 mask;
// Set to true by RM when a fatal NVLINK error is encountered (requires
// asking RM to service pending interrupts to be current).
NvBool *error_notifier;
} nvlink_status;
struct
{
NvU32 swizz_id;
@@ -859,16 +923,19 @@ struct uvm_gpu_struct
struct
{
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/"
struct proc_dir_entry *dir;
// "gpus/${gpu_id}" -> "UVM-GPU-${physical-UUID}/${sub_processor_index}"
struct proc_dir_entry *dir_symlink;
// The GPU instance UUID symlink if SMC is enabled.
// The GPU instance UUID symlink.
// "gpus/UVM-GI-${GI-UUID}" ->
// "UVM-GPU-${physical-UUID}/${sub_processor_index}"
struct proc_dir_entry *gpu_instance_uuid_symlink;
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/info"
struct proc_dir_entry *info_file;
struct proc_dir_entry *dir_peers;
} procfs;
// Placeholder for per-GPU performance heuristics information
@@ -876,6 +943,13 @@ struct uvm_gpu_struct
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
bool uvm_test_force_upper_pushbuffer_segment;
// Have we initialised device p2p pages.
bool device_p2p_initialised;
// Used to protect allocation of p2p_mem and assignment of the page
// zone_device_data fields.
uvm_mutex_t device_p2p_lock;
};
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
@@ -905,7 +979,7 @@ struct uvm_parent_gpu_struct
NvProcessorUuid uuid;
// Nice printable name including the uvm gpu id, ascii name from RM and uuid
char name[UVM_GPU_NICE_NAME_BUFFER_LENGTH];
char name[sizeof("ID 999: : ") - 1 + UVM_GPU_NAME_LENGTH + UVM_PARENT_GPU_UUID_STRING_LENGTH];
// GPU information and provided by RM (architecture, implementation,
// hardware classes, etc.).
@@ -967,6 +1041,8 @@ struct uvm_parent_gpu_struct
// Whether CE supports physical addressing mode for writes to vidmem
bool ce_phys_vidmem_write_supported;
// Addressing mode(s) supported for CE transfers between this GPU and its
// peers: none, physical only, physical and virtual, etc.
uvm_gpu_peer_copy_mode_t peer_copy_mode;
// Virtualization mode of the GPU.
@@ -1056,6 +1132,15 @@ struct uvm_parent_gpu_struct
// Indicates whether the GPU can map sysmem with pages larger than 4k
bool can_map_sysmem_with_large_pages;
struct
{
// If true, the granularity of key rotation is a single channel. If
// false, the key replacement affects all channels on the engine. The
// supported granularity is dependent on the number of key slots
// available in HW.
bool per_channel_key_rotation;
} conf_computing;
// VA base and size of the RM managed part of the internal UVM VA space.
//
// The internal UVM VA is shared with RM by RM controlling some of the top
@@ -1068,6 +1153,11 @@ struct uvm_parent_gpu_struct
NvU64 rm_va_base;
NvU64 rm_va_size;
// Base and size of the GPU VA space used for peer identity mappings,
// it is used only if peer_copy_mode is UVM_GPU_PEER_COPY_MODE_VIRTUAL.
NvU64 peer_va_base;
NvU64 peer_va_size;
// Base and size of the GPU VA used for uvm_mem_t allocations mapped in the
// internal address_space_tree.
NvU64 uvm_mem_va_base;
@@ -1087,11 +1177,17 @@ struct uvm_parent_gpu_struct
struct
{
// "gpus/UVM-GPU-${physical-UUID}/"
struct proc_dir_entry *dir;
// "gpus/UVM-GPU-${physical-UUID}/fault_stats"
struct proc_dir_entry *fault_stats_file;
// "gpus/UVM-GPU-${physical-UUID}/access_counters"
struct proc_dir_entry *access_counters_file;
// "gpus/UVM-GPU-${physical-UUID}/peers/"
struct proc_dir_entry *dir_peers;
} procfs;
// Interrupt handling state and locks
@@ -1220,6 +1316,22 @@ struct uvm_parent_gpu_struct
unsigned long smmu_prod;
unsigned long smmu_cons;
} smmu_war;
struct
{
// Is EGM support enabled on this GPU.
bool enabled;
// Local EGM peer ID. This ID is used to route EGM memory accesses to
// the local CPU socket.
NvU8 local_peer_id;
// EGM base address of the EGM carveout for remote EGM accesses.
// The base address is used when computing PTE PA address values for
// accesses to the local CPU socket's EGM memory from other peer
// GPUs.
NvU64 base_address;
} egm;
};
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@@ -1239,42 +1351,71 @@ static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
return gpu->parent->rm_device;
}
struct uvm_gpu_peer_struct
typedef struct
{
// ref_count also controls state maintained in each GPU instance
// (uvm_gpu_t). See init_peer_access().
NvU64 ref_count;
} uvm_gpu_peer_t;
typedef struct
{
// The fields in this global structure can only be inspected under one of
// the following conditions:
//
// - The VA space lock is held for either read or write, both GPUs are
// registered in the VA space, and the corresponding bit in the
// - The VA space lock is held for either read or write, both parent GPUs
// are registered in the VA space, and the corresponding bit in the
// va_space.enabled_peers bitmap is set.
//
// - The global lock is held.
//
// - While the global lock was held in the past, the two GPUs were detected
// to be SMC peers and were both retained.
// - While the global lock was held in the past, the two parent GPUs were
// both retained.
//
// - While the global lock was held in the past, the two GPUs were detected
// to be NVLINK peers and were both retained.
// - While the global lock was held in the past, the two parent GPUs were
// detected to be NVLINK peers and were both retained.
//
// - While the global lock was held in the past, the two GPUs were detected
// to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was called.
// - While the global lock was held in the past, the two parent GPUs were
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
// called.
//
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
// GPU must be read from the original GPU's peer_gpus table. The fields
// will not change while the lock is held, but they may no longer be valid
// because the other GPU might be in teardown.
// Peer Id associated with this device w.r.t. to a peer GPU.
// This field is used to determine when this struct has been initialized
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
// PCIe peers are initialized when retain_pcie_peers_from_uuids() is called.
NvU64 ref_count;
// Saved values from UvmGpuP2PCapsParams to be used after GPU instance
// creation. This should be per GPU instance since LCEs are associated with
// GPU instances, not parent GPUs, but for now MIG is not supported for
// NVLINK peers so RM associates this state with the parent GPUs. This will
// need to be revisited if that NVLINK MIG peer support is added.
NvU8 optimalNvlinkWriteCEs[2];
// Peer Id associated with this device with respect to a peer parent GPU.
// Note: peerId (A -> B) != peerId (B -> A)
// peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
// peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
NvU8 peer_ids[2];
// The link type between the peer GPUs, currently either PCIe or NVLINK.
// This field is used to determine the when this peer struct has been
// initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
// initialized at GPU registration time. PCIe peers are initialized when
// the refcount below goes from 0 to 1.
// EGM peer Id associated with this device w.r.t. a peer GPU.
// Note: egmPeerId (A -> B) != egmPeerId (B -> A)
// egm_peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
// egm_peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
//
// Unlike VIDMEM peers, EGM peers are not symmetric. This means that if
// one of the GPUs is EGM-enabled, it does not automatically mean that
// the other is also EGM-enabled. Therefore, an EGM peer Ids are only
// valid if the peer GPU is EGM-enabled, i.e. egm_peer_id[0] is valid
// iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
NvU8 egm_peer_ids[2];
// The link type between the peer parent GPUs, currently either PCIe or
// NVLINK.
uvm_gpu_link_type_t link_type;
// Maximum unidirectional bandwidth between the peers in megabytes per
@@ -1282,10 +1423,6 @@ struct uvm_gpu_peer_struct
// See UvmGpuP2PCapsParams.
NvU32 total_link_line_rate_mbyte_per_s;
// For PCIe, the number of times that this has been retained by a VA space.
// For NVLINK this will always be 1.
NvU64 ref_count;
// This handle gets populated when enable_peer_access successfully creates
// an NV50_P2P object. disable_peer_access resets the same on the object
// deletion.
@@ -1299,9 +1436,13 @@ struct uvm_gpu_peer_struct
// GPU-A <-> GPU-B link is bidirectional, pairs[x][0] is always the
// local GPU, while pairs[x][1] is the remote GPU. The table shall be
// filled like so: [[GPU-A, GPU-B], [GPU-B, GPU-A]].
uvm_gpu_t *pairs[2][2];
uvm_parent_gpu_t *pairs[2][2];
} procfs;
};
// Peer-to-peer state for MIG instance pairs between two different parent
// GPUs.
uvm_gpu_peer_t gpu_peers[UVM_MAX_UNIQUE_SUB_PROCESSOR_PAIRS];
} uvm_parent_gpu_peer_t;
// Initialize global gpu state
NV_STATUS uvm_gpu_init(void);
@@ -1315,7 +1456,9 @@ void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);
static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
{
UVM_ASSERT(gpu->mem_info.numa.enabled);
if (!gpu->mem_info.numa.enabled)
UVM_ASSERT(gpu->mem_info.numa.node_id == NUMA_NO_NODE);
return gpu->mem_info.numa.node_id;
}
@@ -1324,6 +1467,7 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;
UVM_ASSERT(gpu->mem_info.numa.enabled);
UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);
@@ -1380,12 +1524,12 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
return atomic64_read(&gpu->retained_count);
}
// Decrease the refcount on the parent GPU object, and actually delete the object
// if the refcount hits zero.
// Decrease the refcount on the parent GPU object, and actually delete the
// object if the refcount hits zero.
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
// Calculates peer table index using GPU ids.
NvU32 uvm_gpu_peer_table_index(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);
// Either retains an existing PCIe peer entry or creates a new one. In both
// cases the two GPUs are also each retained.
@@ -1396,35 +1540,45 @@ NV_STATUS uvm_gpu_retain_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
// LOCKING: requires the global lock to be held
void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0, uvm_parent_gpu_t *parent_gpu1);
// Get the aperture for local_gpu to use to map memory resident on remote_gpu.
// They must not be the same gpu.
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
// Returns the physical address for use by accessing_gpu of a vidmem allocation
// on the peer owning_gpu. This address can be used for making PTEs on
// accessing_gpu, but not for copying between the two GPUs. For that, use
// uvm_gpu_peer_copy_address.
uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
// Returns the physical or virtual address for use by accessing_gpu to copy to/
// from a vidmem allocation on the peer owning_gpu. This may be different from
// uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
// physical memory directly.
uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
// Return the reference count for the P2P state between the given GPUs.
// The two GPUs must have different parents.
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the P2P capabilities between the gpus with the given indexes
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
// NUMA node that remote_gpu is attached to.
// Note that local_gpu can be equal to remote_gpu when memory is resident in
// CPU NUMA node local to local_gpu. In this case, the local EGM peer ID will
// be used.
uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent_gpu_t *remote_gpu);
// Get the P2P capabilities between the given gpus
static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
{
return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
}
UVM_ASSERT(gpu0 != gpu1);
static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
{
if (gpu0->parent->nvswitch_info.is_nvswitch_connected && gpu1->parent->nvswitch_info.is_nvswitch_connected) {
UVM_ASSERT(uvm_gpu_peer_caps(gpu0, gpu1)->link_type >= UVM_GPU_LINK_NVLINK_2);
return true;
}
return false;
}
static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
return gpu0->parent == gpu1->parent;
}
@@ -1460,8 +1614,8 @@ static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_par
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
}
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
// for CE access.
// Given a GPU, CPU, or EGM PEER physical address (not VIDMEM peer), retrieve an
// address suitable for CE access.
static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
{
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
@@ -1483,6 +1637,12 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
}
// Check whether the provided address points to peer memory:
// * Physical address using one of the PEER apertures
// * Physical address using SYS aperture that belongs to an exposed coherent memory
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
// Check for ECC errors
//
// Notably this check cannot be performed where it's not safe to call into RM.
@@ -1495,6 +1655,23 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
// Check for NVLINK errors
//
// Inject NVLINK error
NV_STATUS uvm_gpu_inject_nvlink_error(uvm_gpu_t *gpu, UVM_TEST_NVLINK_ERROR_TYPE error_type);
NV_STATUS uvm_gpu_get_injected_nvlink_error(uvm_gpu_t *gpu);
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
// Check for NVLINK errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access
//
// size has to be aligned to PAGE_SIZE.
@@ -1595,9 +1772,6 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
// Debug print of GPU properties
void uvm_gpu_print(uvm_gpu_t *gpu);
// Add the given instance pointer -> user_channel mapping to this GPU. The
// bottom half GPU page fault handler uses this to look up the VA space for GPU
// faults.
@@ -1637,4 +1811,7 @@ typedef enum
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
} uvm_gpu_buffer_flush_mode_t;
// PCIe BAR containing static framebuffer memory mappings for PCIe P2P
int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
#endif // __UVM_GPU_H__

View File

@@ -43,8 +43,9 @@
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
#define UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR 0x1
#define UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR 0x2
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x4
// Each page in a tracked physical range may belong to a different VA Block. We
// preallocate an array of reverse map translations. However, access counter
@@ -600,7 +601,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = NV_ERR_INVALID_DEVICE;
status = NV_OK;
}
else {
UvmGpuAccessCntrConfig default_config =
@@ -684,7 +685,10 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
while (get != put) {
// Wait until valid bit is set
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
if (uvm_global_get_status() != NV_OK)
goto done;
}
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
++get;
@@ -692,6 +696,7 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
get = 0;
}
done:
write_get(parent_gpu, get);
}
@@ -830,12 +835,18 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
(fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
uvm_access_counter_buffer_entry_t *current_entry = &notification_cache[notification_index];
// We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
// individually since entries can be written out of order
// We cannot just wait for the last entry (the one pointed by put) to
// become valid, we have to do it individually since entries can be
// written out of order
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
// We have some entry to work on. Let's do the rest later.
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
goto done;
// There's no entry to work on and something has gone wrong. Ignore
// the rest.
if (uvm_global_get_status() != NV_OK)
goto done;
}
// Prevent later accesses being moved above the read of the valid bit
@@ -991,7 +1002,9 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
}
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
UVM_ASSERT(!(flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR));
if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR)
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
return status;
@@ -999,9 +1012,11 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
uvm_gpu_t *gpu,
NvU64 base,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_entries,
NvU32 flags)
NvU32 flags,
uvm_page_mask_t *migrated_mask)
{
NV_STATUS status = NV_OK;
@@ -1016,8 +1031,39 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
}
}
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
if (flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR) {
NvU32 i;
UVM_ASSERT(base);
UVM_ASSERT(migrated_mask);
for (i = 0; i < num_entries; i++) {
NvU32 start_index = i;
NvU32 end_index;
for (end_index = i; end_index < num_entries; end_index++) {
NvU32 mask_index = (notification_start[end_index]->address.address - base) / PAGE_SIZE;
if (!uvm_page_mask_test(migrated_mask, mask_index))
break;
}
if (end_index > start_index) {
status = access_counter_clear_notifications(gpu,
&notification_start[start_index],
end_index - start_index);
if (status != NV_OK)
return status;
}
i = end_index;
}
}
else if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) {
UVM_ASSERT(!base);
UVM_ASSERT(!migrated_mask);
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
}
return status;
}
@@ -1242,7 +1288,7 @@ static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_c
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
gpu->id: UVM_ID_CPU;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
UVM_ASSERT(num_reverse_mappings > 0);
@@ -1304,7 +1350,7 @@ static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_c
}
if (status == NV_OK)
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
*out_flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
}
done:
@@ -1329,7 +1375,7 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
NV_STATUS status = NV_OK;
size_t index;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
for (index = 0; index < num_reverse_mappings; ++index) {
NvU32 out_flags_local = 0;
@@ -1341,7 +1387,7 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
if (status != NV_OK)
break;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
*out_flags |= out_flags_local;
}
@@ -1473,7 +1519,7 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
UVM_ASSERT(resident_gpu != NULL);
if (gpu != resident_gpu && uvm_gpus_are_nvswitch_connected(gpu, resident_gpu)) {
if (gpu != resident_gpu && uvm_parent_gpus_are_nvswitch_connected(gpu->parent, resident_gpu->parent)) {
UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
}
@@ -1499,7 +1545,7 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
&out_flags_local);
total_reverse_mappings += num_reverse_mappings;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
flags |= out_flags_local;
if (status != NV_OK)
@@ -1610,7 +1656,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
return;
if (UVM_ID_IS_GPU(resident_id))
resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
resident_gpu = uvm_gpu_get(resident_id);
if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
uvm_page_mask_set(accessed_pages, page_index);
@@ -1692,9 +1738,15 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
uvm_mutex_unlock(&va_block->lock);
if (status == NV_OK)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
flags_status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], *out_index - index, flags);
flags_status = notify_tools_and_process_flags(va_space,
gpu,
0,
&notifications[index],
*out_index - index,
flags,
NULL);
if ((status == NV_OK) && (flags_status != NV_OK))
status = flags_status;
@@ -1713,7 +1765,6 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
NvU64 base;
NvU64 end;
NvU64 address;
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
NV_STATUS status = NV_OK;
NV_STATUS flags_status;
struct vm_area_struct *vma = NULL;
@@ -1733,7 +1784,13 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
if (!vma) {
// Clear the notification entry to continue receiving access counter
// notifications when a new VMA is allocated in this range.
status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], 1, flags);
status = notify_tools_and_process_flags(va_space,
gpu,
0,
&notifications[index],
1,
UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR,
NULL);
*out_index = index + 1;
return status;
}
@@ -1741,7 +1798,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
uvm_page_mask_zero(&ats_context->accessed_mask);
uvm_page_mask_zero(&ats_context->access_counters.accessed_mask);
for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
@@ -1750,7 +1807,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
break;
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
uvm_page_mask_set(&ats_context->access_counters.accessed_mask, (address - base) / PAGE_SIZE);
}
*out_index = i;
@@ -1762,10 +1819,15 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
// location is set
// If no pages were actually migrated, don't clear the access counters.
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags_status = notify_tools_and_process_flags(va_space, gpu, &notifications[index], *out_index - index, flags);
flags_status = notify_tools_and_process_flags(va_space,
gpu,
base,
&notifications[index],
*out_index - index,
UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR,
&ats_context->access_counters.migrated_mask);
if ((status == NV_OK) && (flags_status != NV_OK))
status = flags_status;
@@ -1799,25 +1861,32 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
// Avoid clearing the entry by default.
NvU32 flags = 0;
uvm_va_block_t *va_block = NULL;
uvm_va_range_managed_t *managed_range = uvm_va_range_to_managed_or_null(va_range);
if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
size_t index = uvm_va_range_block_index(va_range, address);
if (managed_range) {
size_t index = uvm_va_range_block_index(managed_range, address);
va_block = uvm_va_range_block(va_range, index);
va_block = uvm_va_range_block(managed_range, index);
// If the va_range is a managed range, the notification belongs to a
// recently freed va_range if va_block is NULL. If va_block is not
// NULL, service_virt_notifications_in_block will process flags.
// Clear the notification entry to continue receiving notifications
// when a new va_range is allocated in that region.
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags = UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
}
if (va_block) {
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
}
else {
status = notify_tools_and_process_flags(va_space, gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
status = notify_tools_and_process_flags(va_space,
gpu_va_space->gpu,
0,
batch_context->virt.notifications,
1,
flags,
NULL);
*out_index = index + 1;
}
}
@@ -1839,7 +1908,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
// - If the va_block isn't HMM, the notification belongs to a recently
// freed va_range. Clear the notification entry to continue receiving
// notifications when a new va_range is allocated in this region.
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
(status == NV_ERR_INVALID_ADDRESS) ||
@@ -1849,9 +1918,11 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
// in the batch.
status = notify_tools_and_process_flags(va_space,
gpu_va_space->gpu,
0,
batch_context->virt.notifications,
1,
flags);
flags,
NULL);
*out_index = index + 1;
}
@@ -1917,9 +1988,11 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
else {
status = notify_tools_and_process_flags(va_space,
current_entry->gpu,
0,
&batch_context->virt.notifications[i],
1,
0);
0,
NULL);
i++;
}
}
@@ -1979,6 +2052,64 @@ void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
}
}
NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
{
uvm_gpu_t *gpu;
uvm_parent_gpu_t *parent_gpu = NULL;
NV_STATUS status = NV_OK;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_processor_mask_t *retained_gpus;
retained_gpus = uvm_processor_mask_cache_alloc();
if (!retained_gpus)
return NV_ERR_NO_MEMORY;
uvm_processor_mask_zero(retained_gpus);
uvm_va_space_down_read(va_space);
for_each_va_space_gpu(gpu, va_space) {
if (gpu->parent == parent_gpu)
continue;
uvm_gpu_retain(gpu);
uvm_processor_mask_set(retained_gpus, gpu->id);
parent_gpu = gpu->parent;
}
uvm_va_space_up_read(va_space);
for_each_gpu_in_mask(gpu, retained_gpus) {
if (!gpu->parent->access_counters_supported)
continue;
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
// Access counters not enabled. Nothing to clear
if (gpu->parent->isr.access_counters.handling_ref_count) {
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
status = access_counter_clear_all(gpu);
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
}
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
if (status != NV_OK)
break;
}
for_each_gpu_in_mask(gpu, retained_gpus)
uvm_gpu_release(gpu);
uvm_processor_mask_cache_free(retained_gpus);
return status;
}
static const NvU32 g_uvm_access_counters_threshold_max = (1 << 15) - 1;
static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,

View File

@@ -612,40 +612,35 @@ static void access_counters_isr_bottom_half_entry(void *args)
UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
}
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
// the replayable fault interrupt by updating GET. This is because, in this
// configuration, GET is a dummy register used to inform GSP-RM (the owner
// of the HW replayable fault buffer) of the latest entry consumed by the
// UVM driver. The real GET register is owned by GSP-RM.
//
// The retriggering of a replayable faults bottom half happens then
// manually, by scheduling a bottom half for later if there is any pending
// work in the fault buffer accessible by UVM. The retriggering adddresses
// two problematic scenarios caused by GET updates not setting any
// interrupt:
//
// (1) UVM didn't process all the entries up to cached PUT
//
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
// added new entries such that cached PUT is out-of-date
//
// In both cases, re-enablement of interrupts would have caused the
// replayable fault to be triggered in a non-CC setup, because the updated
// value of GET is different from PUT. But this not the case in Confidential
// Computing, so a bottom half needs to be manually scheduled in order to
// ensure that all faults are serviced.
//
// While in the typical case the retriggering happens within a replayable
// fault bottom half, it can also happen within a non-interrupt path such as
// uvm_gpu_fault_buffer_flush.
static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
{
bool retrigger = false;
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
// the replayable fault interrupt by updating GET. This is because, in this
// configuration, GET is a dummy register used to inform GSP-RM (the owner
// of the HW replayable fault buffer) of the latest entry consumed by the
// UVM driver. The real GET register is owned by GSP-RM.
//
// The retriggering of a replayable faults bottom half happens then
// manually, by scheduling a bottom half for later if there is any pending
// work in the fault buffer accessible by UVM. The retriggering adddresses
// two problematic scenarios caused by GET updates not setting any
// interrupt:
//
// (1) UVM didn't process all the entries up to cached PUT
//
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
// added new entries such that cached PUT is out-of-date
//
// In both cases, re-enablement of interrupts would have caused the
// replayable fault to be triggered in a non-CC setup, because the updated
// value of GET is different from PUT. But this not the case in Confidential
// Computing, so a bottom half needs to be manually scheduled in order to
// ensure that all faults are serviced.
//
// While in the typical case the retriggering happens within a replayable
// fault bottom half, it can also happen within a non-interrupt path such as
// uvm_gpu_fault_buffer_flush.
if (g_uvm_global.conf_computing_enabled)
retrigger = true;
if (!retrigger)
if (!g_uvm_global.conf_computing_enabled)
return;
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

View File

@@ -579,8 +579,10 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
uvm_page_mask_zero(&ats_context->read_fault_mask);
uvm_page_mask_zero(&ats_context->write_fault_mask);
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
uvm_page_mask_zero(&ats_context->faults.accessed_mask);
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
@@ -597,14 +599,17 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
}
else {
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(fault_address);
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
uvm_page_index_t page_index = (fault_address - base) / PAGE_SIZE;
uvm_page_mask_t *fault_mask = (fault_access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) ?
&ats_context->write_fault_mask :
&ats_context->read_fault_mask;
&ats_context->faults.write_fault_mask :
&ats_context->faults.read_fault_mask;
uvm_page_mask_set(fault_mask, page_index);
uvm_page_mask_set(accessed_mask, page_index);
status = uvm_ats_service_faults(gpu_va_space, vma, base, ats_context);
if (status == NV_OK) {
// Invalidate ATS TLB entries if needed

View File

@@ -636,6 +636,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
if (status != NV_OK)
return status;
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
}
@@ -644,7 +645,15 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
while (get != put) {
// Wait until valid bit is set
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
// Channels might be idle (e.g. in teardown) so check for errors
// actively. In that case the gpu pointer is valid.
status = gpu ? uvm_channel_manager_check_errors(gpu->channel_manager) : uvm_global_get_status();
if (status != NV_OK) {
write_get(parent_gpu, get);
return status;
}
}
fault_buffer_skip_replayable_entry(parent_gpu, get);
++get;
@@ -890,6 +899,10 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
// We have some entry to work on. Let's do the rest later.
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
goto done;
status = uvm_global_get_status();
if (status != NV_OK)
goto done;
}
// Prevent later accesses being moved above the read of the valid bit
@@ -1410,7 +1423,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
&end);
}
else {
policy = uvm_va_range_get_policy(va_block->va_range);
policy = &va_block->managed_range->policy;
end = va_block->end;
}
@@ -1689,11 +1702,11 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
NvU32 i;
NV_STATUS status = NV_OK;
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
const uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
const uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
const uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
UVM_ASSERT(vma);
@@ -1703,7 +1716,7 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
// Remove prefetched pages from the serviced mask since fault servicing
// Remove SW prefetched pages from the serviced mask since fault servicing
// failures belonging to prefetch pages need to be ignored.
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
@@ -1763,8 +1776,9 @@ static void start_new_sub_batch(NvU64 *sub_batch_base,
NvU32 fault_index,
uvm_ats_fault_context_t *ats_context)
{
uvm_page_mask_zero(&ats_context->read_fault_mask);
uvm_page_mask_zero(&ats_context->write_fault_mask);
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
*sub_batch_fault_index = fault_index;
*sub_batch_base = UVM_VA_BLOCK_ALIGN_DOWN(address);
@@ -1784,8 +1798,9 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_buffer_entry_t *previous_entry = NULL;
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
uvm_gpu_t *gpu = gpu_va_space->gpu;
bool replay_per_va_block =
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
@@ -1817,7 +1832,9 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
// End of sub-batch. Service faults gathered so far.
if (fault_address >= (sub_batch_base + UVM_VA_BLOCK_SIZE)) {
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask));
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) ||
!uvm_page_mask_empty(write_fault_mask) ||
!uvm_page_mask_empty(prefetch_only_fault_mask));
status = service_fault_batch_ats_sub_vma(gpu_va_space,
vma,
@@ -1834,8 +1851,14 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
if ((access_type <= UVM_FAULT_ACCESS_TYPE_READ) ||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
// Do not check for coalesced access type. If there are multiple different
// accesses to an address, we can disregard the prefetch one.
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
if ((access_type == UVM_FAULT_ACCESS_TYPE_READ) ||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
uvm_page_mask_set(read_fault_mask, page_index);
if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE)
@@ -1849,7 +1872,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
(previous_entry->va_space == current_entry->va_space));
// Service the last sub-batch.
if ((status == NV_OK) && (!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask))) {
if ((status == NV_OK) &&
(!uvm_page_mask_empty(read_fault_mask) ||
!uvm_page_mask_empty(write_fault_mask) ||
!uvm_page_mask_empty(prefetch_only_fault_mask))) {
status = service_fault_batch_ats_sub_vma(gpu_va_space,
vma,
sub_batch_base,

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -30,13 +30,34 @@
#define UVM_SEMAPHORE_SIZE 4
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
#define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
#define UVM_SEMAPHORE_COUNT_PER_PAGE (UVM_SEMAPHORE_PAGE_SIZE / UVM_SEMAPHORE_SIZE)
// The top nibble of the canary base is intentionally 0. The rest of the value
// is arbitrary. See the comments below on make_canary.
#define UVM_SEMAPHORE_CANARY_BASE 0x0badc0de
#define UVM_SEMAPHORE_CANARY_MASK 0xf0000000
// In Confidential Computing, the representation of the semaphore payload
// requires additional storage (fields), because it is encrypted.
//
// The payload fields are written by the GPU, and read by the CPU.
typedef struct
{
// The actual (encrypted) payload value.
NvU32 encrypted_payload;
// Plaintext number used to version a {encrypted_payload, auth_tag} pair.
// The notifier ensures that the CPU can decrypt a valid snapshot of those
// encryption materials.
uvm_gpu_semaphore_notifier_t notifier;
// Padding used to enforce 16-byte alignment of the authentication tag.
NvU64 unused;
// Authentication tag associated with the encrypted payload.
NvU8 auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
} uvm_gpu_encrypted_semaphore_payload_t;
struct uvm_gpu_semaphore_pool_struct
{
// The GPU owning the pool
@@ -61,14 +82,9 @@ struct uvm_gpu_semaphore_pool_page_struct
uvm_rm_mem_t *memory;
struct {
// Unprotected sysmem storing encrypted value of semaphores
// Unprotected sysmem storing encrypted value of semaphores, in addition
// to other encryption-related data.
uvm_rm_mem_t *encrypted_payload_memory;
// Unprotected sysmem storing encryption auth tags
uvm_rm_mem_t *auth_tag_memory;
// Unprotected sysmem storing plain text notifier values
uvm_rm_mem_t *notifier_memory;
} conf_computing;
// Pool the page is part of
@@ -131,7 +147,6 @@ static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
// A pool allocated in the CPR of vidmem cannot be read/written from the
// CPU.
return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
return UVM_IS_DEBUG();
}
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
@@ -146,68 +161,49 @@ static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
uvm_rm_mem_free(page->memory);
page->memory = NULL;
if (gpu_semaphore_pool_is_secure(page->pool)) {
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
uvm_rm_mem_free(page->conf_computing.notifier_memory);
page->conf_computing.encrypted_payload_memory = NULL;
page->conf_computing.auth_tag_memory = NULL;
page->conf_computing.notifier_memory = NULL;
}
else {
if (!gpu_semaphore_pool_is_secure(page->pool))
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
UVM_ASSERT(!page->conf_computing.notifier_memory);
}
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
page->conf_computing.encrypted_payload_memory = NULL;
}
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
{
NV_STATUS status;
uvm_gpu_semaphore_pool_t *pool = page->pool;
uvm_gpu_t *gpu = pool->gpu;
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
size_t align = 0;
bool map_all = true;
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
if (map_all)
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
else
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
if (status != NV_OK)
goto error;
size_t memory_size = UVM_SEMAPHORE_PAGE_SIZE;
if (!gpu_semaphore_pool_is_secure(pool))
return NV_OK;
return uvm_rm_mem_alloc_and_map_all(gpu, memory_type, memory_size, 0, &page->memory);
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
status = uvm_rm_mem_alloc(gpu, memory_type, memory_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT, &page->memory);
if (status != NV_OK)
goto error;
// TODO: Bug 4607874: This check can be removed once a more general solution
// to prevent reordering of CE writes is in place.
//
// The sysmem allocation backing the page's encrypted payload memory must be
// 32-bytes aligned (UVM_CONF_COMPUTING_BUF_ALIGNMENT). If each individual
// encrypted payload is 32 bytes, then it never spans more than a single,
// naturally aligned, segment of 32 bytes. This is required to prevent
// reordering issues that result on failures when decrypting the semaphore's
// payload on the CPU.
BUILD_BUG_ON(sizeof(uvm_gpu_encrypted_semaphore_payload_t) != UVM_CONF_COMPUTING_BUF_ALIGNMENT);
BUILD_BUG_ON(offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag) != UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_PAGE_SIZE,
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(uvm_gpu_encrypted_semaphore_payload_t),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&page->conf_computing.encrypted_payload_memory);
if (status != NV_OK)
goto error;
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&page->conf_computing.auth_tag_memory);
if (status != NV_OK)
goto error;
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
0,
&page->conf_computing.notifier_memory);
if (status != NV_OK)
goto error;
return NV_OK;
error:
pool_page_free_buffers(page);
@@ -492,49 +488,64 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
}
static uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore_payload(uvm_gpu_semaphore_t *semaphore)
{
uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore;
encrypted_semaphore = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
return encrypted_semaphore + semaphore->index;
}
static NvU64 encrypted_semaphore_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
uvm_gpu_semaphore_pool_page_t *page = semaphore->page;
NvU64 gpu_va_base = uvm_rm_mem_get_gpu_uvm_va(page->conf_computing.encrypted_payload_memory, page->pool->gpu);
return gpu_va_base + semaphore->index * sizeof(uvm_gpu_encrypted_semaphore_payload_t);
}
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
return &encrypted_semaphore_payload(semaphore)->encrypted_payload;
}
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
semaphore->page->pool->gpu);
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, encrypted_payload);
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
return uvm_gpu_address_virtual_unprotected(gpu_va);
}
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *notifier_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
return (NvU32*)(notifier_base_va + semaphore->index * sizeof(NvU32));
return &encrypted_semaphore_payload(semaphore)->notifier;
}
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
semaphore->page->pool->gpu);
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, notifier);
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
return uvm_gpu_address_virtual_unprotected(notifier_base_va + semaphore->index * sizeof(NvU32));
return uvm_gpu_address_virtual_unprotected(gpu_va);
}
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
{
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
return encrypted_semaphore_payload(semaphore)->auth_tag;
}
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
{
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
semaphore->page->pool->gpu);
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag);
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
return uvm_gpu_address_virtual_unprotected(gpu_va);
}
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
@@ -593,7 +604,7 @@ static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
{
NV_STATUS status;
uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
uvm_lock_order_t order;
memset(tracking_sem, 0, sizeof(*tracking_sem));
@@ -605,6 +616,8 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g
if (g_uvm_global.conf_computing_enabled)
order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
else
order = UVM_LOCK_ORDER_LEAF;
if (tracking_semaphore_uses_mutex(tracking_sem))
uvm_mutex_init(&tracking_sem->m_lock, order);
@@ -622,22 +635,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
}
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
{
// No new value, or the GPU is currently writing the new encrypted material
// and no change in value would still result in corrupted data.
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
}
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
{
UvmCslIv local_iv;
NvU32 local_payload;
NvU32 new_sem_value;
NvU32 gpu_notifier;
NvU32 last_observed_notifier;
NvU32 new_gpu_notifier = 0;
NvU32 iv_index = 0;
uvm_gpu_semaphore_notifier_t gpu_notifier;
uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
// A channel can have multiple entries pending and the tracking semaphore
// update of each entry can race with this function. Since the semaphore
@@ -646,62 +648,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
unsigned tries_left = channel->num_gpfifo_entries;
NV_STATUS status = NV_OK;
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
NvU32 *gpu_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
UVM_ASSERT(uvm_channel_is_ce(channel));
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
return;
do {
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
gpu_notifier = READ_ONCE(*semaphore_notifier_cpu_addr);
UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
// Odd notifier value means there's an update in progress.
if (gpu_notifier % 2)
continue;
// There's no change since last time
if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
return;
// Make sure no memory accesses happen before we read the notifier
smp_mb__after_atomic();
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
local_payload = READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
// Make sure the second read of notifier happens after
// all memory accesses.
smp_mb__before_atomic();
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
new_gpu_notifier = READ_ONCE(*semaphore_notifier_cpu_addr);
tries_left--;
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
if (!tries_left) {
status = NV_ERR_INVALID_STATE;
goto error;
}
else {
NvU32 key_version;
const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
NvU32 new_semaphore_value;
UVM_ASSERT(gpu_notifier == new_gpu_notifier);
UVM_ASSERT(gpu_notifier % 2 == 0);
// CPU decryption is guaranteed to use the same key version as the
// associated GPU encryption, because if there was any key rotation in
// between, then key rotation waited for all channels to complete before
// proceeding. The wait implies that the semaphore value matches the
// last one encrypted on the GPU, so this CPU decryption should happen
// before the key is rotated.
key_version = uvm_channel_pool_key_version(channel->pool);
if (gpu_notifier == new_gpu_notifier) {
status = uvm_conf_computing_cpu_decrypt(channel,
&new_sem_value,
&new_semaphore_value,
&local_payload,
&local_iv,
sizeof(new_sem_value),
&semaphore->conf_computing.ivs[iv_index],
key_version,
sizeof(new_semaphore_value),
&local_auth_tag);
if (status != NV_OK)
goto error;
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
}
uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
return;
return;
}
error:
// Decryption failure is a fatal error as well as running out of try left.
@@ -728,7 +740,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
// mechanism to all semaphore
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
}
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);

View File

@@ -29,6 +29,8 @@
#include "uvm_rm_mem.h"
#include "uvm_linux.h"
typedef NvU32 uvm_gpu_semaphore_notifier_t;
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
// that's used for synchronization among them.
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
@@ -52,8 +54,8 @@ struct uvm_gpu_semaphore_struct
UvmCslIv *ivs;
NvU32 cached_payload;
NvU32 last_pushed_notifier;
NvU32 last_observed_notifier;
uvm_gpu_semaphore_notifier_t last_pushed_notifier;
uvm_gpu_semaphore_notifier_t last_observed_notifier;
} conf_computing;
};
@@ -105,11 +107,12 @@ NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
// Allocate a semaphore from the pool.
//
// The semaphore will be mapped on all GPUs currently registered with the UVM
// driver, and on all new GPUs which will be registered in the future.
// Unless the Confidential Computing feature is enabled and the pool is a
// secure pool. In this case, it is only mapped to the GPU that holds the
// allocation.
// driver, and on all new GPUs which will be registered in the future. The only
// exception (in Confidential Computing) are semaphores allocated from a secure
// pool, which are only mapped on the GPU that holds the allocation.
//
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
// to the proxy address space.
//
@@ -154,7 +157,7 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);

Some files were not shown because too many files have changed in this diff Show More