mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-29 12:39:46 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23e9e76214 | ||
|
|
ca09591fbd | ||
|
|
346011d45c | ||
|
|
7a6a5a1f9a | ||
|
|
7d3cbfe254 | ||
|
|
9940d2229a | ||
|
|
5e52edb203 | ||
|
|
2cca8b3fd5 | ||
|
|
caa2dd11a0 | ||
|
|
e45d91de02 |
222
CHANGELOG.md
222
CHANGELOG.md
@@ -1,222 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
## Release 555 Entries
|
||||
|
||||
### [555.42.02] 2024-05-21
|
||||
|
||||
## Release 550 Entries
|
||||
|
||||
### [550.78] 2024-04-25
|
||||
|
||||
### [550.76] 2024-04-17
|
||||
|
||||
### [550.67] 2024-03-19
|
||||
|
||||
### [550.54.15] 2024-03-18
|
||||
|
||||
### [550.54.14] 2024-02-23
|
||||
|
||||
#### Added
|
||||
|
||||
- Added vGPU Host and vGPU Guest support. For vGPU Host, please refer to the README.vgpu packaged in the vGPU Host Package for more details.
|
||||
|
||||
### [550.40.07] 2024-01-24
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Set INSTALL_MOD_DIR only if it's not defined, [#570](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/570) by @keelung-yang
|
||||
|
||||
## Release 545 Entries
|
||||
|
||||
### [545.29.06] 2023-11-22
|
||||
|
||||
#### Fixed
|
||||
|
||||
- The brightness control of NVIDIA seems to be broken, [#573](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/573)
|
||||
|
||||
### [545.29.02] 2023-10-31
|
||||
|
||||
### [545.23.06] 2023-10-17
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix always-false conditional, [#493](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/493) by @meme8383
|
||||
|
||||
#### Added
|
||||
|
||||
- Added beta-quality support for GeForce and Workstation GPUs. Please see the "Open Linux Kernel Modules" chapter in the NVIDIA GPU driver end user README for details.
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.129.03] 2023-10-31
|
||||
|
||||
### [535.113.01] 2023-09-21
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
|
||||
|
||||
### [535.104.05] 2023-08-22
|
||||
|
||||
### [535.98] 2023-08-08
|
||||
|
||||
### [535.86.10] 2023-07-31
|
||||
|
||||
### [535.86.05] 2023-07-18
|
||||
|
||||
### [535.54.03] 2023-06-14
|
||||
|
||||
### [535.43.02] 2023-05-30
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed console restore with traditional VGA consoles.
|
||||
|
||||
#### Added
|
||||
|
||||
- Added support for Run Time D3 (RTD3) on Ampere and later GPUs.
|
||||
- Added support for G-Sync on desktop GPUs.
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.41.03] 2023-03-23
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as `gsp_tu10x.bin` and `gsp_ga10x.bin` to better reflect the GPU architectures supported by each firmware file in this release.
|
||||
- The .run installer will continue to install firmware to /lib/firmware/nvidia/<version> and the nvidia.ko kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for power management features like Suspend, Hibernate and Resume.
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.147.05] 2023-10-31
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
|
||||
|
||||
### [525.125.06] 2023-06-26
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
||||
### [525.105.17] 2023-03-30
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
|
||||
### [525.85.12] 2023-01-30
|
||||
|
||||
### [525.85.05] 2023-01-19
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix build problems with Clang 15.0, [#377](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/377) by @ptr1337
|
||||
|
||||
### [525.78.01] 2023-01-05
|
||||
|
||||
### [525.60.13] 2022-12-05
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.61.07] 2022-10-20
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for GeForce RTX 4090 GPUs.
|
||||
|
||||
### [520.61.05] 2022-10-10
|
||||
|
||||
#### Added
|
||||
|
||||
- Introduce support for NVIDIA H100 GPUs.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
|
||||
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
|
||||
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
|
||||
|
||||
## Release 515 Entries
|
||||
|
||||
### [515.76] 2022-09-20
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Collection of minor fixes to issues, [#6](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/61) by @Joshua-Ashton
|
||||
- Remove unnecessary use of acpi_bus_get_device().
|
||||
|
||||
### [515.57] 2022-06-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Backtick is deprecated, [#273](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/273) by @arch-user-france1
|
||||
|
||||
### [515.48.07] 2022-05-31
|
||||
|
||||
#### Added
|
||||
|
||||
- List of compatible GPUs in README.md.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix various README capitalizations, [#8](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/8) by @27lx
|
||||
- Automatically tag bug report issues, [#15](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/15) by @thebeanogamer
|
||||
- Improve conftest.sh Script, [#37](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/37) by @Nitepone
|
||||
- Update HTTP link to HTTPS, [#101](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/101) by @alcaparra
|
||||
- moved array sanity check to before the array access, [#117](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/117) by @RealAstolfo
|
||||
- Fixed some typos, [#122](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/122) by @FEDOyt
|
||||
- Fixed capitalization, [#123](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/123) by @keroeslux
|
||||
- Fix typos in NVDEC Engine Descriptor, [#126](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/126) from @TrickyDmitriy
|
||||
- Extranous apostrohpes in a makefile script [sic], [#14](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/14) by @kiroma
|
||||
- HDMI no audio @ 4K above 60Hz, [#75](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/75) by @adolfotregosa
|
||||
- dp_configcaps.cpp:405: array index sanity check in wrong place?, [#110](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/110) by @dcb314
|
||||
- NVRM kgspInitRm_IMPL: missing NVDEC0 engine, cannot initialize GSP-RM, [#116](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/116) by @kfazz
|
||||
- ERROR: modpost: "backlight_device_register" [...nvidia-modeset.ko] undefined, [#135](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/135) by @sndirsch
|
||||
- aarch64 build fails, [#151](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/151) by @frezbo
|
||||
|
||||
### [515.43.04] 2022-05-11
|
||||
|
||||
- Initial release.
|
||||
|
||||
16
README.md
16
README.md
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 555.42.02.
|
||||
version 550.163.01.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
555.42.02 driver release. This can be achieved by installing
|
||||
550.163.01 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -74,7 +74,7 @@ kernel.
|
||||
|
||||
The NVIDIA open kernel modules support the same range of Linux kernel
|
||||
versions that are supported with the proprietary NVIDIA kernel modules.
|
||||
This is currently Linux kernel 4.15 or newer.
|
||||
This is currently Linux kernel 3.10 or newer.
|
||||
|
||||
|
||||
## How to Contribute
|
||||
@@ -188,7 +188,7 @@ encountered specific to them.
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/555.42.02/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.163.01/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@@ -754,9 +754,12 @@ Subsystem Device ID.
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H20 | 2329 10DE 198B |
|
||||
| NVIDIA H20 | 2329 10DE 198C |
|
||||
| NVIDIA H20-3e | 232C 10DE 2063 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H200 | 2335 10DE 18BE |
|
||||
| NVIDIA H200 | 2335 10DE 18BF |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
@@ -856,7 +859,6 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
@@ -874,6 +876,7 @@ Subsystem Device ID.
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA L20 | 26BA 10DE 1990 |
|
||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
@@ -910,11 +913,13 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4060 | 2808 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
|
||||
@@ -928,4 +933,5 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28E3 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
|
||||
@@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
|
||||
UBSAN_SANITIZE := y
|
||||
endif
|
||||
|
||||
#
|
||||
# Command to create a symbolic link, explicitly resolving the symlink target
|
||||
# to an absolute path to abstract away the difference between Linux < 6.13,
|
||||
# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
|
||||
# Linux >= 6.13, where the CWD is the external module source tree.
|
||||
#
|
||||
# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
|
||||
# kernel modules which use precompiled binary object files.
|
||||
#
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $(abspath $<) $@
|
||||
|
||||
|
||||
$(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(src)/$(_module)/$(_module).Kbuild))
|
||||
|
||||
@@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"555.42.02\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.163.01\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@@ -118,7 +132,7 @@ ifeq ($(ARCH),x86_64)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),powerpc)
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -DNV_UVM_ENABLE
|
||||
|
||||
@@ -52,6 +52,22 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
# If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
|
||||
# Look for the compiler specified there, and use it by default, if found.
|
||||
ifeq ($(origin CC),default)
|
||||
cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
|
||||
echo "$$CONFIG_CC_VERSION_TEXT"))
|
||||
|
||||
ifneq ($(cc_version_text),)
|
||||
ifeq ($(shell command -v $(cc_version_text)),)
|
||||
$(warning WARNING: Unable to locate the compiler $(cc_version_text) \
|
||||
from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
|
||||
else
|
||||
CC=$(cc_version_text)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
CC ?= cc
|
||||
LD ?= ld
|
||||
OBJDUMP ?= objdump
|
||||
@@ -65,6 +81,16 @@ else
|
||||
)
|
||||
endif
|
||||
|
||||
KERNEL_ARCH = $(ARCH)
|
||||
|
||||
ifneq ($(filter $(ARCH),i386 x86_64),)
|
||||
KERNEL_ARCH = x86
|
||||
else
|
||||
ifeq ($(filter $(ARCH),arm64 powerpc),)
|
||||
$(error Unsupported architecture $(ARCH))
|
||||
endif
|
||||
endif
|
||||
|
||||
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
|
||||
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
|
||||
$(NV_KERNEL_MODULES))
|
||||
@@ -106,8 +132,9 @@ else
|
||||
# module symbols on which the Linux kernel's module resolution is dependent
|
||||
# and hence must be used whenever present.
|
||||
|
||||
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
|
||||
$(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
|
||||
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
|
||||
$(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
|
||||
$(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \
|
||||
$(KERNEL_OUTPUT)/scripts/module.lds
|
||||
NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))
|
||||
|
||||
|
||||
@@ -58,10 +58,14 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
|
||||
// Version 4.4 is allowed, temporarily, although not officially supported.
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
|
||||
#error "This driver does not support kernels older than Linux 4.15!"
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
#error "This driver does not support kernels older than 2.6.32!"
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0)
|
||||
# define KERNEL_2_6
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
|
||||
# define KERNEL_3
|
||||
#else
|
||||
#error "This driver does not support development kernels!"
|
||||
#endif
|
||||
|
||||
#if defined (CONFIG_SMP) && !defined (__SMP__)
|
||||
@@ -470,7 +474,9 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
void *ptr = __vmalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -488,7 +494,9 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
|
||||
void *ptr = ioremap(phys, size);
|
||||
#endif
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -524,8 +532,9 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -541,8 +550,9 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
|
||||
#endif
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, size);
|
||||
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@@ -671,7 +681,9 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
/* All memory cached in PPC64LE; can't honor 'cached' input. */
|
||||
ptr = vmap(pages, page_count, VM_MAP, prot);
|
||||
if (ptr)
|
||||
{
|
||||
NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
|
||||
}
|
||||
return (NvUPtr)ptr;
|
||||
}
|
||||
|
||||
@@ -832,16 +844,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
|
||||
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
|
||||
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
|
||||
at->page_table); \
|
||||
at->flags, at->page_table); \
|
||||
}
|
||||
|
||||
#define NV_PRINT_VMA(nv_debug_level,vma) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08lx bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
__FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \
|
||||
NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \
|
||||
}
|
||||
@@ -1074,8 +1086,6 @@ static inline void nv_kmem_ctor_dummy(void *arg)
|
||||
kmem_cache_destroy(kmem_cache); \
|
||||
}
|
||||
|
||||
#define NV_KMEM_CACHE_ALLOC_ATOMIC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_ATOMIC)
|
||||
#define NV_KMEM_CACHE_ALLOC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_KERNEL)
|
||||
#define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \
|
||||
@@ -1102,23 +1112,6 @@ static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack_atomic(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC_ATOMIC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
|
||||
@@ -29,17 +29,17 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pin_user_pages()
|
||||
*
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6.
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 4c630f307455
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()") in v6.5.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
@@ -63,28 +63,25 @@ typedef int vm_fault_t;
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages()
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages() was deprecated by commit
|
||||
* cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
* (2016 Feb 12: cde70140fed8429acf7a14e2e2cbd3e329036653)for the non-remote case
|
||||
* (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* Completely moved to the 6 argument version of get_user_pages() by
|
||||
* commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
|
||||
* helpers from the get_user*() APIs") in v4.6-rc4.
|
||||
* Completely moved to the 6 argument version of get_user_pages -
|
||||
* 2016 Apr 4: c12d2da56d0e07d230968ee2305aaa86b93a6832
|
||||
*
|
||||
* write and force parameters were replaced with gup_flags by
|
||||
* commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") in v4.9.
|
||||
* write and force parameters were replaced with gup_flags by -
|
||||
* 2016 Oct 12: 768ae309a96103ed02eb1e111e838c87854d8b51
|
||||
*
|
||||
* A 7-argument version of get_user_pages was introduced into linux-4.4.y by
|
||||
* commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") which cherry-picked the replacement of the write and
|
||||
* force parameters with gup_flags.
|
||||
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
|
||||
* replacement of the write and force parameters with gup_flags
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages() by commit 54d020692b34
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
|
||||
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -115,19 +112,18 @@ typedef int vm_fault_t;
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/*
|
||||
* pin_user_pages_remote()
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9.
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
* Removed unused vmas parameter from pin_user_pages_remote() by commit
|
||||
* 0b295316b3a9 ("mm/gup: remove unused vmas parameter from
|
||||
* pin_user_pages_remote()") in v6.5.
|
||||
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -147,7 +143,7 @@ typedef int vm_fault_t;
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
@@ -157,17 +153,19 @@ typedef int vm_fault_t;
|
||||
*
|
||||
* get_user_pages_remote() write/force parameters were replaced
|
||||
* with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
|
||||
* write/force parameters with gup_flags") in v4.9.
|
||||
* write/force parameters with gup_flags") in v4.9 (2016-10-13).
|
||||
*
|
||||
* get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in
|
||||
* v4.10 (2016-12-14).
|
||||
*
|
||||
* get_user_pages_remote() removed 'tsk' parameter by
|
||||
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
* all gup code") in v5.9.
|
||||
* all gup code") in v5.9-rc1 (2020-08-11).
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit ca5e863233e8
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()") in v6.5.
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -609,15 +609,6 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *vidmem_power_status;
|
||||
const char *dynamic_power_status;
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
const char *s0ix_status;
|
||||
} nv_power_info_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@@ -787,7 +778,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
@@ -831,7 +822,6 @@ void NV_API_CALL nv_acpi_methods_init (NvU32 *);
|
||||
void NV_API_CALL nv_acpi_methods_uninit (void);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_method (NvU32, NvU32, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port (nv_state_t *, NvU8 *, NvU32, NvU32, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dsm_method (nv_state_t *, NvU8 *, NvU32, NvBool, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_ddc_method (nv_state_t *, void *, NvU32 *, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dod_method (nv_state_t *, NvU32 *, NvU32 *);
|
||||
@@ -1000,10 +990,10 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
@@ -1037,7 +1027,9 @@ void NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_s
|
||||
NV_STATUS NV_API_CALL rm_ref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
void NV_API_CALL rm_unref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
NV_STATUS NV_API_CALL rm_transition_dynamic_power(nvidia_stack_t *, nv_state_t *, NvBool, NvBool *);
|
||||
void NV_API_CALL rm_get_power_info(nvidia_stack_t *, nv_state_t *, nv_power_info_t *);
|
||||
const char* NV_API_CALL rm_get_vidmem_power_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
@@ -1053,7 +1045,7 @@ NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, con
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
|
||||
@@ -592,6 +592,13 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
NV_ERR_NOT_READY
|
||||
NV_ERR_INVALID_LOCK_STATE
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NVSWITCH_FABRIC_NOT_READY
|
||||
NV_ERR_NVSWITCH_FABRIC_FAILURE
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
|
||||
UvmGpuCaps *caps);
|
||||
@@ -1462,29 +1469,6 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceKeyRotationChannelDisable
|
||||
|
||||
This function notifies RM that the given channels are idle.
|
||||
|
||||
This function is called after RM has notified UVM that keys need to be rotated.
|
||||
When called RM will disable the channels, rotate their keys, and then re-enable
|
||||
the channels.
|
||||
|
||||
Locking: This function acquires an API and GPU lock.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
channelList[IN] - An array of channel handles whose channels are idle.
|
||||
channelListCount[IN] - Number of channels in channelList. Its value must be
|
||||
greater than 0.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - channelList is NULL or channeListCount is 0.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceKeyRotationChannelDisable(uvmGpuChannelHandle channelList[],
|
||||
NvU32 channeListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
Cryptography Services Library (CSL) Interface
|
||||
*/
|
||||
@@ -1528,15 +1512,21 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslUpdateContext
|
||||
nvUvmInterfaceCslRotateKey
|
||||
|
||||
Updates contexts after a key rotation event and can only be called once per
|
||||
key rotation event. Following a key rotation event, and before
|
||||
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
|
||||
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
|
||||
Disables channels and rotates keys.
|
||||
|
||||
Locking: This function acquires an API lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
This function disables channels and rotates associated keys. The channels
|
||||
associated with the given CSL contexts must be idled before this function is
|
||||
called. To trigger key rotation all allocated channels for a given key must
|
||||
be present in the list. If the function returns successfully then the CSL
|
||||
contexts have been updated with the new key.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure
|
||||
to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
|
||||
guarantee that no CSL function, including this one, is invoked
|
||||
concurrently with the CSL contexts in contextList.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
contextList[IN/OUT] - An array of pointers to CSL contexts.
|
||||
@@ -1544,9 +1534,13 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
must be greater than 0.
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
|
||||
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
|
||||
can retry at a later time.
|
||||
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
|
||||
when attempting to acquire a lock.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
@@ -1554,17 +1548,13 @@ NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
|
||||
Rotates the IV for a given channel and operation.
|
||||
|
||||
This function will rotate the IV on both the CPU and the GPU.
|
||||
Outstanding messages that have been encrypted by the GPU should first be
|
||||
decrypted before calling this function with operation equal to
|
||||
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
|
||||
encrypted by the CPU should first be decrypted before calling this function
|
||||
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
For a given operation the channel must be idle before calling this function.
|
||||
This function can be called regardless of the value of the IV's message counter.
|
||||
|
||||
Locking: This function attempts to acquire the GPU lock.
|
||||
In case of failure to acquire the return code
|
||||
is NV_ERR_STATE_IN_USE.
|
||||
Locking: This function attempts to acquire the GPU lock. In case of failure to
|
||||
acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
|
||||
that no CSL function, including this one, is invoked concurrently with
|
||||
the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1598,8 +1588,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1635,9 +1625,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
During a key rotation event the previous key is stored in the CSL context.
|
||||
This allows data encrypted by the GPU to be decrypted with the previous key.
|
||||
The keyRotationId parameter identifies which key is used. The first key rotation
|
||||
ID has a value of 0 that increments by one for each key rotation event.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1647,6 +1642,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
|
||||
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
|
||||
internal counter is used.
|
||||
keyRotationId[IN] - Specifies the key that is used for decryption.
|
||||
A value of NV_U32_MAX specifies the current key.
|
||||
inputBuffer[IN] - Address of ciphertext input buffer.
|
||||
outputBuffer[OUT] - Address of plaintext output buffer.
|
||||
addAuthData[IN] - Address of the plaintext additional authenticated data used to
|
||||
@@ -1667,6 +1664,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
NvU8 const *inputBuffer,
|
||||
UvmCslIv const *decryptIv,
|
||||
NvU32 keyRotationId,
|
||||
NvU8 *outputBuffer,
|
||||
NvU8 const *addAuthData,
|
||||
NvU32 addAuthDataSize,
|
||||
@@ -1681,8 +1679,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
undefined behavior.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1710,8 +1708,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@@ -1736,8 +1734,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
@@ -1759,13 +1757,13 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslIv *iv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogExternalEncryption
|
||||
nvUvmInterfaceCslLogEncryption
|
||||
|
||||
Checks and logs information about non-CSL encryptions, such as those that
|
||||
originate from the GPU.
|
||||
Checks and logs information about encryptions associated with the given
|
||||
CSL context.
|
||||
|
||||
For contexts associated with channels, this function does not modify elements of
|
||||
the UvmCslContext and must be called for each external encryption invocation.
|
||||
the UvmCslContext, and must be called for every CPU/GPU encryption.
|
||||
|
||||
For the context associated with fault buffers, bufferSize can encompass multiple
|
||||
encryption invocations, and the UvmCslContext will be updated following a key
|
||||
@@ -1775,19 +1773,25 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
The caller must guarantee that no CSL function, including this one,
|
||||
is invoked concurrently with the same CSL context.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[OUT] - The size of the buffer(s) encrypted by the
|
||||
operation[IN] - If the CSL context is associated with a fault
|
||||
buffer, this argument is ignored. If it is
|
||||
associated with a channel, it must be either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
bufferSize[IN] - The size of the buffer(s) encrypted by the
|
||||
external entity in units of bytes.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
|
||||
to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize);
|
||||
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -39,12 +39,12 @@
|
||||
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
|
||||
// should be OK when user is not sure which pagesize allocation it wants
|
||||
//
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
|
||||
#define UVM_PAGE_SIZE_4K 0x1000ULL
|
||||
#define UVM_PAGE_SIZE_64K 0x10000ULL
|
||||
#define UVM_PAGE_SIZE_128K 0x20000ULL
|
||||
#define UVM_PAGE_SIZE_2M 0x200000ULL
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000ULL
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0
|
||||
#define UVM_PAGE_SIZE_4K 0x1000
|
||||
#define UVM_PAGE_SIZE_64K 0x10000
|
||||
#define UVM_PAGE_SIZE_128K 0x20000
|
||||
#define UVM_PAGE_SIZE_2M 0x200000
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000
|
||||
|
||||
//
|
||||
// When modifying flags, make sure they are compatible with the mirrored
|
||||
@@ -595,16 +595,16 @@ typedef struct UvmGpuClientInfo_tag
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_APM,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_COUNT
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE = 0,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC = 2
|
||||
} UvmGpuConfComputeMode;
|
||||
|
||||
typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
// Is key rotation enabled for UVM keys
|
||||
NvBool bKeyRotationEnabled;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
#define UVM_GPU_NAME_LENGTH 0x40
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -494,23 +494,6 @@ do \
|
||||
//
|
||||
#define NV_TWO_N_MINUS_ONE(n) (((1ULL<<(n/2))<<((n+1)/2))-1)
|
||||
|
||||
//
|
||||
// Create a 64b bitmask with n bits set
|
||||
// This is the same as ((1ULL<<n) - 1), but it doesn't overflow for n=64
|
||||
//
|
||||
// ...
|
||||
// n=-1, 0x0000000000000000
|
||||
// n=0, 0x0000000000000000
|
||||
// n=1, 0x0000000000000001
|
||||
// ...
|
||||
// n=63, 0x7FFFFFFFFFFFFFFF
|
||||
// n=64, 0xFFFFFFFFFFFFFFFF
|
||||
// n=65, 0xFFFFFFFFFFFFFFFF
|
||||
// n=66, 0xFFFFFFFFFFFFFFFF
|
||||
// ...
|
||||
//
|
||||
#define NV_BITMASK64(n) ((n<1) ? 0ULL : (NV_U64_MAX>>((n>64) ? 0 : (64-n))))
|
||||
|
||||
#define DRF_READ_1WORD_BS(d,r,f,v) \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<8)?DRF_READ_1BYTE_BS(NV##d##r##f,(v)): \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<16)?DRF_READ_2BYTE_BS(NV##d##r##f,(v)): \
|
||||
@@ -591,13 +574,6 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
|
||||
n32 = BIT_IDX_32(LOWESTBIT(n32));\
|
||||
}
|
||||
|
||||
// Destructive operation on n64
|
||||
#define LOWESTBITIDX_64(n64) \
|
||||
{ \
|
||||
n64 = BIT_IDX_64(LOWESTBIT(n64));\
|
||||
}
|
||||
|
||||
|
||||
// Destructive operation on n32
|
||||
#define HIGHESTBITIDX_32(n32) \
|
||||
{ \
|
||||
@@ -718,6 +694,42 @@ nvPrevPow2_U64(const NvU64 x )
|
||||
} \
|
||||
}
|
||||
|
||||
//
|
||||
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
|
||||
// ucode compilers to avoid signature mismatch.
|
||||
//
|
||||
#ifndef NVDEC_1_0
|
||||
/*!
|
||||
* Returns the position of nth set bit in the given mask.
|
||||
*
|
||||
* Returns -1 if mask has fewer than n bits set.
|
||||
*
|
||||
* n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is
|
||||
* the first set LSB.
|
||||
*
|
||||
* Example, if mask = 0x000000F0u and n = 1, the return value will be 5.
|
||||
* Example, if mask = 0x000000F0u and n = 4, the return value will be -1.
|
||||
*/
|
||||
static NV_FORCEINLINE NvS32
|
||||
nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
{
|
||||
NvU32 seenSetBitsCount = 0;
|
||||
NvS32 index;
|
||||
FOR_EACH_INDEX_IN_MASK(32, index, mask)
|
||||
{
|
||||
if (seenSetBitsCount == n)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
++seenSetBitsCount;
|
||||
}
|
||||
FOR_EACH_INDEX_IN_MASK_END;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // NVDEC_1_0
|
||||
|
||||
//
|
||||
// Size to use when declaring variable-sized arrays
|
||||
//
|
||||
@@ -942,11 +954,6 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
//
|
||||
// Clear the bit at pos (b) for U64 which is < 128.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_CLEAR_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) &= ~NVBIT64(b); else (hi) &= ~NVBIT64( b & 0x3F ); }
|
||||
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -152,7 +152,8 @@ NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manag
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE, 0x0000007C, "PMU RPC error due to no queue slot available for this event")
|
||||
NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation not allowed as key rotation is in progress")
|
||||
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
|
||||
NV_STATUS_CODE(NV_ERR_NVSWITCH_FABRIC_NOT_READY, 0x00000081, "Nvswitch Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
|
||||
NV_STATUS_CODE(NV_ERR_NVSWITCH_FABRIC_FAILURE, 0x00000082, "Nvswitch Fabric Probe failed")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
||||
@@ -152,12 +152,6 @@ typedef signed short NvS16; /* -32768 to 32767 */
|
||||
(((NvU32)(c) & 0xff) << 8) | \
|
||||
(((NvU32)(d) & 0xff))))
|
||||
|
||||
// Macro to build an NvU64 from two DWORDS, listed from msb to lsb
|
||||
#define NvU64_BUILD(a, b) \
|
||||
((NvU64)( \
|
||||
(((NvU64)(a) & ~0U) << 32) | \
|
||||
(((NvU64)(b) & ~0U))))
|
||||
|
||||
#if NVTYPES_USE_STDINT
|
||||
typedef uint32_t NvV32; /* "void": enumerated or multiple fields */
|
||||
typedef uint32_t NvU32; /* 0 to 4294967295 */
|
||||
|
||||
@@ -218,6 +218,8 @@ extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_sev_snp_enabled;
|
||||
extern NvBool os_cc_snp_vtom_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
||||
@@ -101,17 +101,16 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_key_rotation_channel_disable(nvidia_stack_t *, nvgpuChannelHandle_t [], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, UvmCslContext *[], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_key(nvidia_stack_t *, UvmCslContext *[], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU32, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU32);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2523,6 +2523,22 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
file_operations_fop_unsigned_offset_present)
|
||||
#
|
||||
# Determine if the FOP_UNSIGNED_OFFSET define is present.
|
||||
#
|
||||
# Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
|
||||
# fop_flags") in v6.12.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_file_operations_fop_unsigned_offset_present(void) {
|
||||
return FOP_UNSIGNED_OFFSET;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
mm_context_t)
|
||||
#
|
||||
# Determine if the 'mm_context_t' data type is present
|
||||
@@ -5102,6 +5118,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
cc_attr_guest_sev_snp)
|
||||
#
|
||||
# Determine if 'CC_ATTR_GUEST_SEV_SNP' is present.
|
||||
#
|
||||
# Added by commit aa5a461171f9 ("x86/mm: Extend cc_attr to
|
||||
# include AMD SEV-SNP") in v5.19.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
|
||||
#include <linux/cc_platform.h>
|
||||
#endif
|
||||
|
||||
enum cc_attr cc_attributes = CC_ATTR_GUEST_SEV_SNP;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CC_ATTR_SEV_SNP" "" "types"
|
||||
;;
|
||||
|
||||
hv_get_isolation_type)
|
||||
#
|
||||
# Determine if 'hv_get_isolation_type()' is present.
|
||||
# Added by commit faff44069ff5 ("x86/hyperv: Add Write/Read MSR
|
||||
# registers via ghcb page") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_MSHYPERV_H_PRESENT)
|
||||
#include <asm/mshyperv.h>
|
||||
#endif
|
||||
void conftest_hv_get_isolation_type(void) {
|
||||
int i;
|
||||
hv_get_isolation_type(i);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HV_GET_ISOLATION_TYPE" "" "functions"
|
||||
;;
|
||||
|
||||
drm_prime_pages_to_sg_has_drm_device_arg)
|
||||
#
|
||||
# Determine if drm_prime_pages_to_sg() has 'dev' argument.
|
||||
@@ -5252,23 +5304,60 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
|
||||
;;
|
||||
|
||||
unsafe_follow_pfn)
|
||||
follow_pfn)
|
||||
#
|
||||
# Determine if unsafe_follow_pfn() is present.
|
||||
# Determine if follow_pfn() is present.
|
||||
#
|
||||
# unsafe_follow_pfn() was added by commit 69bacee7f9ad
|
||||
# ("mm: Add unsafe_follow_pfn") in v5.13-rc1.
|
||||
#
|
||||
# Note: this commit never made it to the linux kernel, so
|
||||
# unsafe_follow_pfn() never existed.
|
||||
# follow_pfn() was added by commit 3b6748e2dd69
|
||||
# ("mm: introduce follow_pfn()") in v2.6.31-rc1, and removed
|
||||
# by commit 233eb0bf3b94 ("mm: remove follow_pfn")
|
||||
# from linux-next 233eb0bf3b94.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_unsafe_follow_pfn(void) {
|
||||
unsafe_follow_pfn();
|
||||
void conftest_follow_pfn(void) {
|
||||
follow_pfn();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_UNSAFE_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
follow_pte_arg_vma)
|
||||
#
|
||||
# Determine if the first argument of follow_pte is
|
||||
# mm_struct or vm_area_struct.
|
||||
#
|
||||
# The first argument was changed from mm_struct to vm_area_struct by
|
||||
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
|
||||
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pte_t **ptep,
|
||||
spinlock_t **ptl) {
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
|
||||
;;
|
||||
|
||||
ptep_get)
|
||||
#
|
||||
# Determine if ptep_get() is present.
|
||||
#
|
||||
# ptep_get() was added by commit 481e980a7c19
|
||||
# ("mm: Allow arches to provide ptep_get()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_ptep_get(void) {
|
||||
ptep_get();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
@@ -5533,6 +5622,23 @@ compile_test() {
|
||||
else
|
||||
echo "#undef NV_OF_PROPERTY_READ_VARIABLE_U32_ARRAY_PRESENT" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
module_import_ns_takes_string_literal)
|
||||
#
|
||||
# Determine if the MODULE_IMPORT_NS macro takes a string literal
|
||||
# or constant.
|
||||
#
|
||||
# Commit cdd30ebb1b9f ("module: Convert symbol namespace to
|
||||
# string literal") changed MODULE_IMPORT_NS to take a string
|
||||
# literal in Linux kernel v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_IMPORT_NS(DMA_BUF);"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL" "" "functions"
|
||||
;;
|
||||
|
||||
devm_of_platform_populate)
|
||||
@@ -5556,8 +5662,7 @@ compile_test() {
|
||||
|
||||
of_dma_configure)
|
||||
#
|
||||
# Determine if of_dma_configure() function is present, and how
|
||||
# many arguments it takes.
|
||||
# Determine if of_dma_configure() function is present
|
||||
#
|
||||
# Added by commit 591c1ee465ce ("of: configure the platform
|
||||
# device dma parameters") in v3.16. However, it was a static,
|
||||
@@ -5567,69 +5672,17 @@ compile_test() {
|
||||
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
|
||||
# to help re-use") in v4.1.
|
||||
#
|
||||
# It subsequently began taking a third parameter with commit
|
||||
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
|
||||
# in v4.18.
|
||||
#
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
CODE="
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void)
|
||||
{
|
||||
of_dma_configure();
|
||||
}
|
||||
" > conftest$$.c
|
||||
"
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL, false);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
compile_check_conftest "$CODE" "NV_OF_DMA_CONFIGURE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
icc_get)
|
||||
@@ -6255,6 +6308,32 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
acpi_video_register_backlight)
|
||||
#
|
||||
# Determine if acpi_video_register_backlight() function is present
|
||||
#
|
||||
# acpi_video_register_backlight was added by commit 3dbc80a3e4c55c
|
||||
# (ACPI: video: Make backlight class device registration a separate
|
||||
# step (v2)) for v6.0 (2022-09-02).
|
||||
# Note: the include directive for <linux/types> in this conftest is
|
||||
# necessary in order to support kernels between commit 0b9f7d93ca61
|
||||
# ("ACPI / i915: ignore firmware requests backlight change") for
|
||||
# v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port
|
||||
# to new backlight interface selection API") for v4.2 (2015-07-16).
|
||||
# Kernels within this range use the 'bool' type and the related
|
||||
# 'false' value in <acpi/video.h> without first including the
|
||||
# definitions of that type and value.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <acpi/video.h>
|
||||
void conftest_acpi_video_register_backlight(void) {
|
||||
acpi_video_register_backlight(0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions"
|
||||
;;
|
||||
|
||||
acpi_video_backlight_use_native)
|
||||
#
|
||||
# Determine if acpi_video_backlight_use_native() function is present
|
||||
@@ -6598,7 +6677,9 @@ compile_test() {
|
||||
# Determine whether drm_fbdev_generic_setup is present.
|
||||
#
|
||||
# Added by commit 9060d7f49376 ("drm/fb-helper: Finish the
|
||||
# generic fbdev emulation") in v4.19.
|
||||
# generic fbdev emulation") in v4.19. Removed by commit
|
||||
# aae4682e5d66 ("drm/fbdev-generic: Convert to fbdev-ttm")
|
||||
# in v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
@@ -6610,6 +6691,105 @@ compile_test() {
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_GENERIC_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_fbdev_ttm_setup)
|
||||
#
|
||||
# Determine whether drm_fbdev_ttm_setup is present.
|
||||
#
|
||||
# Added by commit aae4682e5d66 ("drm/fbdev-generic:
|
||||
# Convert to fbdev-ttm") in v6.11. Removed by commit
|
||||
# 1000634477d8 ("drm/fbdev-ttm:Convert to client-setup") in v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#endif
|
||||
void conftest_drm_fbdev_ttm_setup(void) {
|
||||
drm_fbdev_ttm_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_client_setup)
|
||||
#
|
||||
# Determine whether drm_client_setup is present.
|
||||
#
|
||||
# Added by commit d07fdf922592 ("drm/fbdev-ttm: Convert to
|
||||
# client-setup") in v6.13 in drm/drm_client_setup.h, but then moved
|
||||
# to drm/clients/drm_client_setup.h by commit b86711c6d6e2
|
||||
# ("drm/client: Move public client header to clients/ subdirectory")
|
||||
# in linux-next b86711c6d6e2.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#endif
|
||||
void conftest_drm_client_setup(void) {
|
||||
drm_client_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_output_poll_changed)
|
||||
#
|
||||
# Determine whether drm_mode_config_funcs.output_poll_changed
|
||||
# callback is present
|
||||
#
|
||||
# Removed by commit 446d0f4849b1 ("drm: Remove struct
|
||||
# drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug
|
||||
# event support is handled through the fbdev emulation interface
|
||||
# going forward.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT)
|
||||
#include <drm/drm_mode_config.h>
|
||||
#else
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
int conftest_drm_output_poll_changed_available(void) {
|
||||
return offsetof(struct drm_mode_config_funcs, output_poll_changed);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_devices(void) {
|
||||
aperture_remove_conflicting_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_pci_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_pci_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_pci_devices(void) {
|
||||
aperture_remove_conflicting_pci_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_pci_framebuffers)
|
||||
@@ -6705,17 +6885,17 @@ compile_test() {
|
||||
# This test is not complete and may return false positive.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto(void) {
|
||||
struct shash_desc sd;
|
||||
struct crypto_shash cs;
|
||||
@@ -6725,6 +6905,47 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
crypto_akcipher_verify)
|
||||
#
|
||||
# Determine whether the crypto_akcipher_verify API is still present.
|
||||
# It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations')
|
||||
# in v6.13-rc1 (2024-10-04).
|
||||
#
|
||||
# This test is dependent on the crypto conftest to determine whether crypto should be
|
||||
# enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify
|
||||
#
|
||||
# The test merely checks for the presence of the API, as it assumes that if the API
|
||||
# is no longer present, the new API to replace it (crypto_sig_verify) must be present.
|
||||
# If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto
|
||||
# conftest above and all crypto code will be compiled out.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto_akcipher_verify(void) {
|
||||
(void)crypto_akcipher_verify;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
ecc_digits_from_bytes)
|
||||
#
|
||||
# Determine whether ecc_digits_from_bytes is present.
|
||||
# It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from
|
||||
# reading too many bytes') in v6.10.
|
||||
#
|
||||
# This functionality is needed when crypto_akcipher_verify is not present.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/internal/ecc.h>
|
||||
void conftest_ecc_digits_from_bytes(void) {
|
||||
(void)ecc_digits_from_bytes;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
mempolicy_has_unified_nodes)
|
||||
#
|
||||
# Determine if the 'mempolicy' structure has
|
||||
@@ -6850,45 +7071,12 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_syncobj_features_present)
|
||||
# Determine if DRIVER_SYNCOBJ and DRIVER_SYNCOBJ_TIMELINE DRM
|
||||
# driver features are present. Timeline DRM synchronization objects
|
||||
# may only be used if both of these are supported by the driver.
|
||||
#
|
||||
# DRIVER_SYNCOBJ_TIMELINE Added by commit 060cebb20cdb ("drm:
|
||||
# introduce a capability flag for syncobj timeline support") in
|
||||
# v5.2
|
||||
#
|
||||
# DRIVER_SYNCOBJ Added by commit e9083420bbac ("drm: introduce
|
||||
# sync objects (v4)") in v4.12
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
int features = DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_SYNCOBJ_FEATURES_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
stack_trace)
|
||||
# Determine if functions stack_trace_{save,print} are present.
|
||||
# Added by commit e9b98e162 ("stacktrace: Provide helpers for
|
||||
# common stack trace operations") in v5.2.
|
||||
CODE="
|
||||
#include <linux/stacktrace.h>
|
||||
void conftest_stack_trace(void) {
|
||||
stack_trace_save();
|
||||
stack_trace_print();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_STACK_TRACE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_unlocked_ioctl_flag_present)
|
||||
# Determine if DRM_UNLOCKED IOCTL flag is present.
|
||||
#
|
||||
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in v6.8.
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
|
||||
# next-20231208.
|
||||
#
|
||||
# DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
|
||||
# commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
|
||||
@@ -6904,6 +7092,74 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
folio_test_swapcache)
|
||||
#
|
||||
# Determine if the folio_test_swapcache() function is present.
|
||||
#
|
||||
# folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
|
||||
# Add folio flag manipulation functions") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/page-flags.h>
|
||||
void conftest_folio_test_swapcache(void) {
|
||||
folio_test_swapcache();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
|
||||
drm_driver_has_date)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'date' field.
|
||||
#
|
||||
# Removed by commit cb2e1c2136f7 ("drm: remove driver date from
|
||||
# struct drm_driver and all drivers") in linux-next, expected in
|
||||
# v6.14.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_driver_has_date(void) {
|
||||
return offsetof(struct drm_driver, date);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
|
||||
#
|
||||
# Determine if the 'mode' pointer argument is const in
|
||||
# drm_connector_helper_funcs::mode_valid.
|
||||
#
|
||||
# The 'mode' pointer argument in
|
||||
# drm_connector_helper_funcs::mode_valid was made const by commit
|
||||
# 26d6fd81916e ("drm/connector: make mode_valid take a const struct
|
||||
# drm_display_mode") in linux-next, expected in v6.15.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#endif
|
||||
|
||||
static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
const struct drm_display_mode *mode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
|
||||
.mode_valid = conftest_drm_connector_mode_valid,
|
||||
};"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -15,6 +15,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_client_setup.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
@@ -32,6 +34,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
generated/utsrelease.h \
|
||||
linux/aperture.h \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
@@ -52,7 +55,6 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/dma-resv.h \
|
||||
soc/tegra/chip-id.h \
|
||||
soc/tegra/fuse.h \
|
||||
soc/tegra/fuse-helper.h \
|
||||
soc/tegra/tegra_bpmp.h \
|
||||
video/nv_internal.h \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
@@ -98,5 +100,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h
|
||||
linux/mpi.h \
|
||||
asm/mshyperv.h \
|
||||
crypto/sig.h
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -62,6 +62,17 @@
|
||||
#undef NV_DRM_FENCE_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
// XXX remove dependency on DRM_TTM_HELPER by implementing nvidia-drm's own
|
||||
// .fbdev_probe callback that uses NVKMS kapi
|
||||
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_CLIENT_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
|
||||
* or drm_crtc_enable_color_mgmt() exist.
|
||||
|
||||
@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
|
||||
}
|
||||
|
||||
static int nv_drm_connector_mode_valid(struct drm_connector *connector,
|
||||
#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
|
||||
const struct drm_display_mode *mode)
|
||||
#else
|
||||
struct drm_display_mode *mode)
|
||||
#endif
|
||||
{
|
||||
struct drm_device *dev = connector->dev;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
|
||||
@@ -176,10 +176,12 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
return;
|
||||
}
|
||||
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
req_config->surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
req_config->dstX = plane_state->crtc_x;
|
||||
req_config->dstY = plane_state->crtc_y;
|
||||
*req_config = (struct NvKmsKapiCursorRequestedConfig) {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
|
||||
if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
|
||||
@@ -273,22 +275,24 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
*req_config = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
|
||||
req_config->config.surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
/* Source values are 16.16 fixed point */
|
||||
.srcX = plane_state->src_x >> 16,
|
||||
.srcY = plane_state->src_y >> 16,
|
||||
.srcWidth = plane_state->src_w >> 16,
|
||||
.srcHeight = plane_state->src_h >> 16,
|
||||
|
||||
/* Source values are 16.16 fixed point */
|
||||
req_config->config.srcX = plane_state->src_x >> 16;
|
||||
req_config->config.srcY = plane_state->src_y >> 16;
|
||||
req_config->config.srcWidth = plane_state->src_w >> 16;
|
||||
req_config->config.srcHeight = plane_state->src_h >> 16;
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
.dstWidth = plane_state->crtc_w,
|
||||
.dstHeight = plane_state->crtc_h,
|
||||
|
||||
req_config->config.dstX = plane_state->crtc_x;
|
||||
req_config->config.dstY = plane_state->crtc_y;
|
||||
req_config->config.dstWidth = plane_state->crtc_w;
|
||||
req_config->config.dstHeight = plane_state->crtc_h;
|
||||
|
||||
req_config->config.csc = old_config.csc;
|
||||
.csc = old_config.csc
|
||||
},
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
@@ -684,7 +688,9 @@ static int nv_drm_plane_atomic_set_property(
|
||||
to_nv_drm_plane_state(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
nv_drm_plane_state->fd_user_ptr = (void __user *)(uintptr_t)(val);
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
@@ -869,12 +875,14 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
* there is no change in new configuration yet with respect
|
||||
* to older one!
|
||||
*/
|
||||
memset(new, 0, sizeof(*new));
|
||||
new->modeSetConfig = old->modeSetConfig;
|
||||
*new = (struct NvKmsKapiHeadRequestedConfig) {
|
||||
.modeSetConfig = old->modeSetConfig,
|
||||
};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(old->layerRequestedConfig); i++) {
|
||||
new->layerRequestedConfig[i].config =
|
||||
old->layerRequestedConfig[i].config;
|
||||
new->layerRequestedConfig[i] = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = old->layerRequestedConfig[i].config,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1681,7 +1689,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
struct NvKmsKapiCrcs crc32;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -ENOENT;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
@@ -1709,7 +1717,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
struct NvKmsKapiCrcs crc32;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -ENOENT;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
|
||||
@@ -64,12 +64,25 @@
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#include <drm/drm_fbdev_generic.h>
|
||||
#endif
|
||||
|
||||
@@ -124,6 +137,7 @@ static const char* nv_get_input_colorspace_name(
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
{
|
||||
struct drm_connector *connector = NULL;
|
||||
@@ -167,6 +181,7 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
|
||||
|
||||
static struct drm_framebuffer *nv_drm_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
@@ -204,7 +219,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
|
||||
.atomic_check = nv_drm_atomic_check,
|
||||
.atomic_commit = nv_drm_atomic_commit,
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
.output_poll_changed = nv_drm_output_poll_changed,
|
||||
#endif
|
||||
};
|
||||
|
||||
static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
|
||||
@@ -373,15 +390,19 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
len++;
|
||||
}
|
||||
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
@@ -476,7 +497,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/*
|
||||
* If fbdev is enabled, take modeset ownership now before other DRM clients
|
||||
* can take master (and thus NVKMS ownership).
|
||||
@@ -604,7 +625,7 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
|
||||
/* Release modeset ownership if fbdev is enabled */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
@@ -802,14 +823,6 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_get_drm_file_unique_id_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_drm_file_unique_id_params *params = data;
|
||||
params->id = (u64)(filep->driver_priv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
@@ -863,13 +876,18 @@ static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
connector = nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
@@ -902,6 +920,11 @@ static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
@@ -1114,6 +1137,10 @@ static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (params->type == NV_DRM_PERMISSIONS_TYPE_MODESET) {
|
||||
return nv_drm_grant_modeset_permission(dev, params, filep);
|
||||
} else if (params->type == NV_DRM_PERMISSIONS_TYPE_SUB_OWNER) {
|
||||
@@ -1279,6 +1306,10 @@ static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (params->type == NV_DRM_PERMISSIONS_TYPE_MODESET) {
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
@@ -1308,17 +1339,6 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
_Static_assert(sizeof(filep->driver_priv) >= sizeof(u64),
|
||||
"filep->driver_priv can not hold an u64");
|
||||
static atomic64_t id = ATOMIC_INIT(0);
|
||||
|
||||
filep->driver_priv = (void *)atomic64_inc_return(&id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
@@ -1544,6 +1564,10 @@ static const struct file_operations nv_drm_fops = {
|
||||
.read = drm_read,
|
||||
|
||||
.llseek = noop_llseek,
|
||||
|
||||
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
|
||||
.fop_flags = FOP_UNSIGNED_OFFSET,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
@@ -1562,9 +1586,6 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DEV_INFO,
|
||||
nv_drm_get_dev_info_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DRM_FILE_UNIQUE_ID,
|
||||
nv_drm_get_drm_file_unique_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
@@ -1647,9 +1668,6 @@ static struct drm_driver nv_drm_driver = {
|
||||
.driver_features =
|
||||
#if defined(NV_DRM_DRIVER_PRIME_FLAG_PRESENT)
|
||||
DRIVER_PRIME |
|
||||
#endif
|
||||
#if defined(NV_DRM_SYNCOBJ_FEATURES_PRESENT)
|
||||
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
|
||||
#endif
|
||||
DRIVER_GEM | DRIVER_RENDER,
|
||||
|
||||
@@ -1661,14 +1679,14 @@ static struct drm_driver nv_drm_driver = {
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
|
||||
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
|
||||
* import/export for all drivers") made these helpers the default when
|
||||
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
|
||||
* to just skip specifying them if the helpers aren't present.
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
@@ -1702,7 +1720,6 @@ static struct drm_driver nv_drm_driver = {
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
.open = nv_drm_open,
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
@@ -1713,13 +1730,20 @@ static struct drm_driver nv_drm_driver = {
|
||||
.name = "nvidia-drm",
|
||||
|
||||
.desc = "NVIDIA DRM driver",
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DATE)
|
||||
.date = "20160202",
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
|
||||
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
|
||||
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
|
||||
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
|
||||
#endif
|
||||
// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE) && defined(DRM_FBDEV_TTM_DRIVER_OPS)
|
||||
DRM_FBDEV_TTM_DRIVER_OPS,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -1761,7 +1785,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
struct device *device = gpu_info->os_device_ptr;
|
||||
bool bus_is_pci;
|
||||
|
||||
DRM_DEBUG(
|
||||
"Registering device for NVIDIA GPU ID 0x08%x",
|
||||
@@ -1795,7 +1818,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
dev->dev_private = nv_dev;
|
||||
nv_dev->dev = dev;
|
||||
|
||||
bus_is_pci =
|
||||
bool bus_is_pci =
|
||||
#if defined(NV_LINUX)
|
||||
device->bus == &pci_bus_type;
|
||||
#elif defined(NV_BSD)
|
||||
@@ -1815,22 +1838,34 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
goto failed_drm_register;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_drm_fbdev_module_param &&
|
||||
drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, &nv_drm_driver);
|
||||
#else
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
|
||||
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT)
|
||||
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
}
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
#endif
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
|
||||
#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */
|
||||
|
||||
/* Add NVIDIA-DRM device into list */
|
||||
|
||||
@@ -1972,12 +2007,12 @@ void nv_drm_suspend_resume(NvBool suspend)
|
||||
|
||||
if (suspend) {
|
||||
drm_kms_helper_poll_disable(dev);
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
|
||||
#endif
|
||||
drm_mode_config_reset(dev);
|
||||
} else {
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
|
||||
#endif
|
||||
drm_kms_helper_poll_enable(dev);
|
||||
|
||||
@@ -293,12 +293,14 @@ __nv_drm_prime_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
|
||||
nv_prime_fence_context->base.nv_dev = nv_dev;
|
||||
nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
|
||||
nv_prime_fence_context->base.fenceSemIndex = p->index;
|
||||
nv_prime_fence_context->pSemSurface = pSemSurface;
|
||||
nv_prime_fence_context->pLinearAddress = pLinearAddress;
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
};
|
||||
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
@@ -463,10 +465,15 @@ int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
int err;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
nv_prime_fence_context = __nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
@@ -521,6 +528,11 @@ int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
nv_dma_fence_t *fence;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
goto done;
|
||||
@@ -1259,16 +1271,18 @@ __nv_drm_semsurf_fence_ctx_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
|
||||
ctx->base.nv_dev = nv_dev;
|
||||
ctx->base.context = nv_dma_fence_context_alloc(1);
|
||||
ctx->base.fenceSemIndex = p->index;
|
||||
ctx->pSemSurface = pSemSurface;
|
||||
ctx->pSemMapping.pVoid = semMapping;
|
||||
ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
|
||||
ctx->callback.local = NULL;
|
||||
ctx->callback.nvKms = NULL;
|
||||
ctx->current_wait_value = 0;
|
||||
*ctx = (struct nv_drm_semsurf_fence_ctx) {
|
||||
.base.ops = &nv_drm_semsurf_fence_ctx_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pSemMapping.pVoid = semMapping,
|
||||
.pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
|
||||
.callback.local = NULL,
|
||||
.callback.nvKms = NULL,
|
||||
.current_wait_value = 0,
|
||||
};
|
||||
|
||||
spin_lock_init(&ctx->lock);
|
||||
INIT_LIST_HEAD(&ctx->pending_fences);
|
||||
@@ -1308,6 +1322,10 @@ int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_semsurf_fence_ctx *ctx;
|
||||
int err;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
return -EINVAL;
|
||||
@@ -1469,6 +1487,11 @@ int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
|
||||
int ret = -EINVAL;
|
||||
int fd;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (p->__pad != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
|
||||
goto done;
|
||||
@@ -1631,6 +1654,10 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
|
||||
unsigned long flags;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (p->pre_wait_value >= p->post_wait_value) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
@@ -1739,6 +1766,11 @@ int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
|
||||
nv_dma_fence_t *fence;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (nv_dev->pDevice == NULL) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
|
||||
|
||||
if (!nv_gem) {
|
||||
|
||||
@@ -380,7 +380,7 @@ int nv_drm_gem_import_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@@ -430,7 +430,7 @@ int nv_drm_gem_export_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -483,7 +483,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
|
||||
int ret = 0;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EOPNOTSUPP;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@@ -551,12 +551,14 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
const struct nv_drm_device *nv_dev_src;
|
||||
const struct nv_drm_gem_nvkms_memory *nv_nvkms_memory_src;
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
BUG_ON(nv_gem_src == NULL || nv_gem_src->ops != &nv_gem_nvkms_memory_ops);
|
||||
|
||||
nv_dev_src = to_nv_device(nv_gem_src->base.dev);
|
||||
nv_nvkms_memory_src = to_nv_nvkms_memory_const(nv_gem_src);
|
||||
|
||||
if ((nv_nvkms_memory =
|
||||
nv_drm_calloc(1, sizeof(*nv_nvkms_memory))) == NULL) {
|
||||
|
||||
@@ -319,7 +319,7 @@ int nv_drm_gem_identify_object_ioctl(struct drm_device *dev,
|
||||
struct nv_drm_gem_object *nv_gem = NULL;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
return -EINVAL;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
nv_dma_buf = nv_drm_gem_object_dma_buf_lookup(dev, filep, p->handle);
|
||||
|
||||
@@ -45,7 +45,8 @@
|
||||
|
||||
/*
|
||||
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
|
||||
* 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
|
||||
* 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
|
||||
* drm_crtc.h") in linux-next, expected in v5.19-rc7.
|
||||
*
|
||||
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
|
||||
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
|
||||
|
||||
@@ -613,8 +613,8 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
|
||||
* legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
|
||||
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
|
||||
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
|
||||
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
|
||||
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
|
||||
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
|
||||
|
||||
@@ -52,7 +52,6 @@
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_CREATE 0x15
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_WAIT 0x16
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_ATTACH 0x17
|
||||
#define DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID 0x18
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@@ -158,11 +157,6 @@
|
||||
DRM_NVIDIA_SEMSURF_FENCE_ATTACH), \
|
||||
struct drm_nvidia_semsurf_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DRM_FILE_UNIQUE_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + \
|
||||
DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID), \
|
||||
struct drm_nvidia_get_drm_file_unique_id_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@@ -391,8 +385,4 @@ struct drm_nvidia_semsurf_fence_attach_params {
|
||||
uint64_t wait_value; /* IN Semaphore value to reach before signal */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_drm_file_unique_id_params {
|
||||
uint64_t id; /* OUT Unique ID of the DRM file */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
||||
@@ -34,7 +34,7 @@ MODULE_PARM_DESC(
|
||||
"Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
|
||||
module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
MODULE_PARM_DESC(
|
||||
fbdev,
|
||||
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
|
||||
|
||||
@@ -587,9 +587,6 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Flip event timeout on head %u", nv_crtc->head);
|
||||
while (!list_empty(&nv_crtc->flip_list)) {
|
||||
__nv_drm_handle_flip_event(nv_crtc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,14 +59,20 @@ typedef struct nv_timer nv_drm_timer;
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
/* Set to true when the atomic modeset feature is enabled. */
|
||||
extern bool nv_drm_modeset_module_param;
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/* Set to true when the nvidia-drm driver should install a framebuffer device */
|
||||
extern bool nv_drm_fbdev_module_param;
|
||||
#endif
|
||||
|
||||
@@ -66,7 +66,11 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
@@ -128,5 +132,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -77,10 +77,10 @@ module_param_named(disable_hdmi_frl, disable_hdmi_frl, bool, 0400);
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
static bool hdmi_deepcolor = true;
|
||||
static bool hdmi_deepcolor = false;
|
||||
module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);
|
||||
|
||||
static bool vblank_sem_control = true;
|
||||
static bool vblank_sem_control = false;
|
||||
module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
|
||||
static bool opportunistic_display_sync = true;
|
||||
@@ -139,20 +139,6 @@ NvBool nvkms_opportunistic_display_sync(void)
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
* Note this only checks that the kernel has the prerequisite
|
||||
* support for syncpts; callers must also check that the hardware
|
||||
* supports syncpts.
|
||||
*/
|
||||
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
|
||||
return NV_TRUE;
|
||||
#else
|
||||
return NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@@ -1010,6 +996,11 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
|
||||
nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
|
||||
"ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
|
||||
acpi_video_register_backlight();
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
@@ -1084,7 +1075,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
|
||||
nvKmsKapiHandleEventQueueChange(device);
|
||||
}
|
||||
|
||||
struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
struct NvKmsKapiDevice *device,
|
||||
int *status)
|
||||
{
|
||||
@@ -1136,7 +1127,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@@ -1199,7 +1190,7 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
}
|
||||
}
|
||||
|
||||
int nvkms_ioctl_common
|
||||
static int nvkms_ioctl_common
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, NvU64 address, const size_t size
|
||||
@@ -1248,26 +1239,6 @@ void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t param_size
|
||||
)
|
||||
{
|
||||
NvBool ret;
|
||||
|
||||
if (nvkms_read_trylock_pm_lock()) {
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
ret = nvkms_ioctl_common(popen,
|
||||
cmd,
|
||||
(NvU64)(NvUPtr)params_address, param_size) == 0;
|
||||
nvkms_read_unlock_pm_lock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
|
||||
@@ -304,11 +304,6 @@ NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info);
|
||||
|
||||
NvBool nvkms_allow_write_combining(void);
|
||||
|
||||
/*!
|
||||
* Check if OS supports syncpoints.
|
||||
*/
|
||||
NvBool nvkms_kernel_supports_syncpts(void);
|
||||
|
||||
/*!
|
||||
* Checks whether the fd is associated with an nvidia character device.
|
||||
*/
|
||||
@@ -333,16 +328,6 @@ NvBool nvkms_ioctl_from_kapi
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* Like nvkms_ioctl_from_kapi, but return NV_FALSE instead of waiting if the
|
||||
* power management read lock cannot be acquired.
|
||||
*/
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* APIs for locking.
|
||||
*/
|
||||
|
||||
@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
|
||||
NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
|
||||
NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $< $@
|
||||
|
||||
targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
|
||||
|
||||
$(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
|
||||
@@ -105,4 +102,4 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -81,7 +81,7 @@
|
||||
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
|
||||
|
||||
// This exists in order to have a function to place a breakpoint on:
|
||||
void on_nvq_assert(void)
|
||||
static void on_nvq_assert(void)
|
||||
{
|
||||
(void)NULL;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -176,7 +176,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
{
|
||||
|
||||
unsigned i, j;
|
||||
const static unsigned attempts = 3;
|
||||
static const unsigned attempts = 3;
|
||||
struct task_struct *thread[3];
|
||||
|
||||
for (i = 0;; i++) {
|
||||
@@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
||||
@@ -8,7 +8,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
|
||||
|
||||
@@ -13,19 +13,6 @@ NVIDIA_UVM_OBJECTS =
|
||||
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
|
||||
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
|
||||
|
||||
# Some linux kernel functions rely on being built with optimizations on and
|
||||
# to work around this we put wrappers for them in a separate file that's built
|
||||
# with optimizations on in debug builds and skipped in other builds.
|
||||
# Notably gcc 4.4 supports per function optimization attributes that would be
|
||||
# easier to use, but is too recent to rely on for now.
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
|
||||
|
||||
ifneq ($(UVM_BUILD_TYPE),debug)
|
||||
# Only build the wrappers on debug builds
|
||||
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
|
||||
endif
|
||||
|
||||
obj-m += nvidia-uvm.o
|
||||
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
@@ -36,15 +23,14 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
NVIDIA_UVM_CFLAGS += -O2
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -g
|
||||
endif
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
|
||||
@@ -56,11 +42,6 @@ NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
|
||||
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
# Force optimizations on for the wrappers
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
|
||||
endif
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-uvm.ko
|
||||
#
|
||||
@@ -88,6 +69,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
|
||||
@@ -127,9 +127,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
@@ -682,6 +682,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
// Semaphore pool vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
#if defined(VM_WIPEONFORK)
|
||||
nv_vm_flags_set(vma, VM_WIPEONFORK);
|
||||
#endif
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
@@ -914,8 +917,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -379,6 +379,17 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// OS state required to register the GPU is malformed, or the partition
|
||||
// identified by the user handles or its configuration changed.
|
||||
//
|
||||
// NV_ERR_NVSWITCH_FABRIC_NOT_READY:
|
||||
// (On NvSwitch-connected system) Indicates that the fabric has not been
|
||||
// configured yet. Caller must retry GPU registration.
|
||||
//
|
||||
// NV_ERR_NVSWITCH_FABRIC_FAILURE:
|
||||
// (On NvSwitch-connected systems) Indicates that the NvLink fabric
|
||||
// failed to be configured.
|
||||
//
|
||||
// NV_ERR_GPU_MEMORY_ONLINING_FAULURE:
|
||||
// (On coherent systems) The GPU's memory onlining failed.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
@@ -1448,7 +1459,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -1462,6 +1475,11 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// The VA range exceeds the largest virtual address supported by the
|
||||
// destination processor.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// preferredCpuMemoryNode is not a valid CPU NUMA node or it corresponds
|
||||
// to a NUMA node ID for a registered GPU. If NUMA is disabled, it
|
||||
// indicates that preferredCpuMemoryNode was not either 0 or -1.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// destinationUuid does not represent a valid processor such as a CPU or
|
||||
// a GPU with a GPU VA space registered for it. Or destinationUuid is a
|
||||
@@ -1528,8 +1546,9 @@ NV_STATUS UvmMigrate(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU. This argument is ignored if the given virtual address range
|
||||
// corresponds to managed memory.
|
||||
// the CPU. -1 indicates no preference, in which case the pages used
|
||||
// can be on any of the available CPU NUMA nodes. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// semaphoreAddress: (INPUT)
|
||||
// Base address of the semaphore.
|
||||
@@ -1586,8 +1605,8 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
//
|
||||
// Migrates the backing of all virtual address ranges associated with the given
|
||||
// range group to the specified destination processor. The behavior of this API
|
||||
// is equivalent to calling UvmMigrate on each VA range associated with this
|
||||
// range group.
|
||||
// is equivalent to calling UvmMigrate with preferredCpuMemoryNode = -1 on each
|
||||
// VA range associated with this range group.
|
||||
//
|
||||
// Any errors encountered during migration are returned immediately. No attempt
|
||||
// is made to migrate the remaining unmigrated ranges and the ranges that are
|
||||
@@ -2169,7 +2188,8 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
//
|
||||
// If any page in the VA range has a preferred location, then the migration and
|
||||
// mapping policies associated with this API take precedence over those related
|
||||
// to the preferred location.
|
||||
// to the preferred location. If the preferred location is a specific CPU NUMA
|
||||
// node, that NUMA node will be used for a CPU-resident copy of the page.
|
||||
//
|
||||
// If any pages in this VA range have any processors present in their
|
||||
// accessed-by list, the migration and mapping policies associated with this
|
||||
@@ -2300,7 +2320,7 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// UvmPreventMigrationRangeGroups has not been called on the range group that
|
||||
// those pages are associated with, then the migration and mapping policies
|
||||
// associated with UvmEnableReadDuplication override the policies outlined
|
||||
// above. Note that enabling read duplication on on any pages in this VA range
|
||||
// above. Note that enabling read duplication on any pages in this VA range
|
||||
// does not clear the state set by this API for those pages. It merely overrides
|
||||
// the policies associated with this state until read duplication is disabled
|
||||
// for those pages.
|
||||
@@ -2333,7 +2353,8 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies.
|
||||
// allowed by the global and thread memory policies. If NUMA is disabled
|
||||
// only 0 and -1 are allowed.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@@ -3463,7 +3484,8 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
//
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
|
||||
// sizeof(UvmToolsEventControlData_V2).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
@@ -3487,6 +3509,8 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsEventControlData_V2::version records the entry version that
|
||||
// will be generated.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
@@ -3499,7 +3523,8 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
|
||||
// UvmToolsEventControlData_V2 (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -205,18 +205,17 @@ void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
|
||||
CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
|
||||
}
|
||||
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@@ -231,8 +230,8 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
// PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
|
||||
// for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
|
||||
@@ -243,12 +242,7 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@@ -261,18 +255,16 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@@ -280,7 +272,6 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@@ -290,9 +281,9 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@@ -334,7 +325,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// PDE3 is the highest level on Pascal-Ampere , see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
@@ -346,15 +337,10 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
sysmembar_value |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
@@ -366,23 +352,21 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@@ -397,7 +381,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// PDE3 is the highest level on Pascal, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
|
||||
}
|
||||
@@ -409,11 +393,6 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@@ -424,9 +403,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
@@ -439,7 +418,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@@ -453,7 +432,12 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_ampere(NvU64 page_size)
|
||||
static NvU32 page_table_depth_ampere(NvU32 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@@ -62,14 +62,14 @@ static NvU32 page_table_depth_ampere(NvU64 page_size)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_ampere(void)
|
||||
static NvU32 page_sizes_ampere(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
{ \
|
||||
params_type params; \
|
||||
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
if (copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
@@ -60,7 +60,7 @@
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
if (copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
return 0; \
|
||||
@@ -84,7 +84,7 @@
|
||||
if (!params) \
|
||||
return -ENOMEM; \
|
||||
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
if (copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
uvm_kvfree(params); \
|
||||
return -EFAULT; \
|
||||
} \
|
||||
@@ -99,7 +99,7 @@
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
if (copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
ret = -EFAULT; \
|
||||
\
|
||||
uvm_kvfree(params); \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -29,9 +29,10 @@
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
NvU32 key_version;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
@@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
@@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
@@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -228,21 +228,65 @@ typedef struct
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
union
|
||||
{
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its
|
||||
// internal operation and each push may modify this state.
|
||||
// push_locks is protected by the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
|
||||
// Per channel buffers in unprotected sysmem.
|
||||
uvm_rm_mem_t *pool_sysmem;
|
||||
|
||||
// Per channel buffers in protected vidmem.
|
||||
uvm_rm_mem_t *pool_vidmem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Current encryption key version, incremented upon key rotation.
|
||||
// While there are separate keys for encryption and decryption, the
|
||||
// two keys are rotated at once, so the versioning applies to both.
|
||||
NvU32 version;
|
||||
|
||||
// Lock used to ensure mutual exclusion during key rotation.
|
||||
uvm_mutex_t mutex;
|
||||
|
||||
// CSL contexts passed to RM for key rotation. This is usually an
|
||||
// array containing the CSL contexts associated with the channels in
|
||||
// the pool. In the case of the WLC pool, the array also includes
|
||||
// CSL contexts associated with LCIC channels.
|
||||
UvmCslContext **csl_contexts;
|
||||
|
||||
// Number of elements in the CSL context array.
|
||||
unsigned num_csl_contexts;
|
||||
|
||||
// Number of bytes encrypted, or decrypted, on the engine associated
|
||||
// with the pool since the last key rotation. Only used during
|
||||
// testing, to force key rotations after a certain encryption size,
|
||||
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
|
||||
//
|
||||
// Encryptions on a LCIC pool are accounted for in the paired WLC
|
||||
// pool.
|
||||
//
|
||||
// TODO: Bug 4612912: these accounting variables can be removed once
|
||||
// RM exposes an API to set the key rotation lower threshold.
|
||||
atomic64_t encrypted;
|
||||
atomic64_t decrypted;
|
||||
} key_rotation;
|
||||
|
||||
} conf_computing;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@@ -322,43 +366,14 @@ struct uvm_channel_struct
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
// Protected sysmem location makes WLC independent from the pushbuffer
|
||||
// allocator. Unprotected sysmem and protected vidmem counterparts
|
||||
// are allocated from the channel pool (sysmem, vidmem).
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
@@ -418,7 +433,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has at least one pool associated with it.
|
||||
// has at least one pool of type UVM_CHANNEL_POOL_TYPE_CE associated with it
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@@ -451,6 +466,16 @@ struct uvm_channel_manager_struct
|
||||
UVM_BUFFER_LOCATION gpput_loc;
|
||||
UVM_BUFFER_LOCATION pushbuffer_loc;
|
||||
} conf;
|
||||
|
||||
struct
|
||||
{
|
||||
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
|
||||
// only be false during (de)initialization.
|
||||
bool wlc_ready;
|
||||
|
||||
// True indicates that key rotation is enabled (UVM-wise).
|
||||
bool key_rotation_enabled;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// Create a channel manager for the GPU
|
||||
@@ -501,6 +526,14 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
|
||||
|
||||
char *uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(uvm_channel_t *channel, unsigned tag_index);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@@ -532,6 +565,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
return UVM_CHANNEL_TYPE_MEMOPS;
|
||||
}
|
||||
|
||||
// Force key rotation in the engine associated with the given channel pool.
|
||||
// Rotation may still not happen if RM cannot acquire the necessary locks (in
|
||||
// which case the function returns NV_ERR_STATE_IN_USE).
|
||||
//
|
||||
// This function should be only invoked in pools in which key rotation is
|
||||
// enabled.
|
||||
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Retrieve the current encryption key version associated with the channel pool.
|
||||
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
|
||||
|
||||
// Privileged channels support all the Host and engine methods, while
|
||||
// non-privileged channels don't support privileged methods.
|
||||
//
|
||||
@@ -579,12 +623,9 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
return manager->conf_computing.wlc_ready;
|
||||
}
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
|
||||
@@ -796,11 +796,8 @@ done:
|
||||
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_push_t *pushes = NULL;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
@@ -810,9 +807,19 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
// Key rotation is disabled because this test relies on nested pushes,
|
||||
// which is illegal. If any push other than the first one triggers key
|
||||
// rotation, the test won't complete. This is because key rotation
|
||||
// depends on waiting for ongoing pushes to end, which doesn't happen
|
||||
// if those pushes are ended after the current one begins.
|
||||
uvm_conf_computing_disable_key_rotation(gpu);
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
|
||||
TEST_CHECK_GOTO(pool != NULL, error);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
@@ -824,7 +831,7 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
TEST_CHECK_GOTO(pushes != NULL, error);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
@@ -841,12 +848,18 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
if (gpu != NULL)
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
@@ -948,6 +961,318 @@ release:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
|
||||
{
|
||||
unsigned num_tries;
|
||||
unsigned max_num_tries = 20;
|
||||
unsigned num_rotations_completed = 0;
|
||||
|
||||
if (num_rotations == 0)
|
||||
return NV_OK;
|
||||
|
||||
// The number of accepted rotations is kept low, so failed rotation
|
||||
// invocations due to RM not acquiring the necessary locks (which imply a
|
||||
// sleep in the test) do not balloon the test execution time.
|
||||
UVM_ASSERT(num_rotations <= 10);
|
||||
|
||||
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
|
||||
// Force key rotation, irrespective of encryption usage.
|
||||
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
|
||||
|
||||
// Key rotation may not be able to complete due to RM failing to acquire
|
||||
// the necessary locks. Detect the situation, sleep for a bit, and then
|
||||
// try again
|
||||
//
|
||||
// The maximum time spent sleeping in a single rotation call is
|
||||
// (max_num_tries * max_sleep_us)
|
||||
if (status == NV_ERR_STATE_IN_USE) {
|
||||
NvU32 min_sleep_us = 1000;
|
||||
NvU32 max_sleep_us = 10000;
|
||||
|
||||
usleep_range(min_sleep_us, max_sleep_us);
|
||||
continue;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
num_rotations_completed++;
|
||||
}
|
||||
|
||||
// If not a single key rotation occurred, the dependent tests still pass,
|
||||
// but there is no much value to them. Instead, return an error so the
|
||||
// maximum number of tries, or the maximum sleep time, are adjusted to
|
||||
// ensure that at least one rotation completes.
|
||||
if (num_rotations_completed > 0)
|
||||
return NV_OK;
|
||||
else
|
||||
return NV_ERR_STATE_IN_USE;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return force_key_rotations(pool, 1);
|
||||
}
|
||||
|
||||
// Test key rotation in all pools. This is useful because key rotation may not
|
||||
// happen otherwise on certain engines during UVM test execution. For example,
|
||||
// if the MEMOPS channel type is mapped to a CE not shared with any other
|
||||
// channel type, then the only encryption taking place in the engine is due to
|
||||
// semaphore releases (4 bytes each). This small encryption size makes it
|
||||
// unlikely to exceed even small rotation thresholds.
|
||||
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, gpu->channel_manager) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(force_key_rotation(pool));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
|
||||
// key rotations.
|
||||
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
void *final_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
final_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (final_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
final_plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
NULL,
|
||||
"GPU > CPU"),
|
||||
out);
|
||||
|
||||
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
|
||||
|
||||
memset(final_plain_cpu, 0, size);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(final_plain_cpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
|
||||
// testing to insert key rotations in between the push end, and the CPU
|
||||
// decryption
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
uvm_channel_t *channel;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
channel = push.channel;
|
||||
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
|
||||
|
||||
// If num_rotations_to_insert is not zero, the current encryption key will
|
||||
// be different from the one used during CE encryption.
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
|
||||
unsigned num_repetitions,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
unsigned i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
NvU8 *plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
|
||||
if (plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
|
||||
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
for (i = 0; i < num_repetitions; i++) {
|
||||
unsigned j;
|
||||
|
||||
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
num_rotations_to_insert),
|
||||
out);
|
||||
|
||||
for (j = 0; j < size; j++)
|
||||
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
|
||||
|
||||
memset(plain_cpu, 0, size);
|
||||
|
||||
}
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
|
||||
// that are no longer the current key, due to key rotation. Given that SEC2
|
||||
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
|
||||
// is exclusive of CE encryptions.
|
||||
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
|
||||
// between the GPU encryption, and the associated CPU decryption.
|
||||
unsigned num_rotations_to_insert = 8;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
break;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@@ -1203,6 +1528,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_channel_key_rotation(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@@ -1338,6 +1667,126 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
unsigned num_rotations_to_insert = 0;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
|
||||
|
||||
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
|
||||
|
||||
for (i = 0; i < params->iterations; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_type_t type;
|
||||
|
||||
if ((i % 3) == 0)
|
||||
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
|
||||
else if ((i % 3) == 1)
|
||||
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
|
||||
else
|
||||
type = UVM_CHANNEL_TYPE_WLC;
|
||||
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
status = force_key_rotation(pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The objective of this test is documented in the user-level function
|
||||
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
uvm_test_rng_t rng;
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
uvm_test_rng_init(&rng, params->seed);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
|
||||
// one.
|
||||
gpu = random_va_space_gpu(&rng, va_space);
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
goto out;
|
||||
|
||||
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
|
||||
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
|
||||
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
|
||||
status = channel_stress_key_rotation_rotate(gpu, params);
|
||||
else
|
||||
status = NV_ERR_INVALID_PARAMETER;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@@ -1349,6 +1798,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
|
||||
return uvm_test_channel_stress_update_channels(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
|
||||
return uvm_test_channel_noop_push(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
|
||||
return uvm_test_channel_stress_key_rotation(va_space, params);
|
||||
default:
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -423,7 +423,9 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *
|
||||
UVM_ASSERT(first);
|
||||
UVM_ASSERT(outer);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
|
||||
// even when plugged into platforms using it.
|
||||
if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
|
||||
@@ -33,6 +33,15 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
// Amount of encrypted data on a given engine that triggers key rotation. This
|
||||
// is a UVM internal threshold, different from that of RM, and used only during
|
||||
// testing.
|
||||
//
|
||||
// Key rotation is triggered when the total encryption size, or the total
|
||||
// decryption size (whatever comes first) reaches this lower threshold on the
|
||||
// engine.
|
||||
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
|
||||
|
||||
// The maximum number of secure operations per push is:
|
||||
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
|
||||
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
|
||||
@@ -352,6 +361,19 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The production key rotation defaults are such that key rotations rarely
|
||||
// happen. During UVM testing more frequent rotations are triggering by relying
|
||||
// on internal encryption usage accounting. When key rotations are triggered by
|
||||
// UVM, the driver does not rely on channel key rotation notifiers.
|
||||
//
|
||||
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
|
||||
// lower threshold. This function, and all the metadata associated with it
|
||||
// (per-pool encryption accounting, for example) can be removed at that point.
|
||||
static bool key_rotation_is_notifier_driven(void)
|
||||
{
|
||||
return !uvm_enable_builtin_tests;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -394,17 +416,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
|
||||
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
|
||||
}
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
@@ -428,27 +468,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
UVM_ASSERT(size);
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(NvU8 const *) src_plain,
|
||||
encrypt_iv,
|
||||
(NvU8 *) dst_cipher,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer)
|
||||
{
|
||||
@@ -469,10 +528,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
key_version,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
channel->name,
|
||||
uvm_gpu_name(uvm_channel_get_gpu(channel)));
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
return status;
|
||||
@@ -485,6 +553,8 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@@ -494,36 +564,48 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(csl_context,
|
||||
fault_entry_size,
|
||||
(const NvU8 *) src_cipher,
|
||||
NULL,
|
||||
NV_U32_MAX,
|
||||
(NvU8 *) dst_plain,
|
||||
&valid,
|
||||
sizeof(valid),
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment)
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
UVM_CSL_OPERATION_DECRYPT,
|
||||
increment,
|
||||
NULL);
|
||||
status = nvUvmInterfaceCslLogEncryption(csl_context, UVM_CSL_OPERATION_DECRYPT, fault_entry_size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(csl_context, UVM_CSL_OPERATION_DECRYPT, 1, NULL);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
@@ -625,3 +707,236 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
// Key rotation cannot be enabled on UVM if it is disabled on RM
|
||||
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// If the channel_manager is not set, we're in channel manager destroy
|
||||
// path after the pointer was NULL-ed. Chances are that other key rotation
|
||||
// infrastructure is not available either. Disallow the key rotation.
|
||||
return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
|
||||
return false;
|
||||
|
||||
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
|
||||
// because currently the encryption key is shared between UVM and RM, but
|
||||
// UVM is not able to idle SEC2 channels owned by RM.
|
||||
if (uvm_channel_pool_is_sec2(pool))
|
||||
return false;
|
||||
|
||||
// Key rotation happens as part of channel reservation, and LCIC channels
|
||||
// are never reserved directly. Rotation of keys in LCIC channels happens
|
||||
// as the result of key rotation in WLC channels.
|
||||
//
|
||||
// Return false even if there is nothing fundamental prohibiting direct key
|
||||
// rotation on LCIC pools
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NvU64 decrypted, encrypted;
|
||||
|
||||
UVM_ASSERT(!key_rotation_is_notifier_driven());
|
||||
|
||||
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
|
||||
|
||||
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
|
||||
|
||||
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
|
||||
{
|
||||
// If key rotation is pending for the pool's engine, then the key rotation
|
||||
// notifier in any of the engine channels can be used by UVM to detect the
|
||||
// situation. Note that RM doesn't update all the notifiers in a single
|
||||
// atomic operation, so it is possible that the channel read by UVM (the
|
||||
// first one in the pool) indicates that a key rotation is pending, but
|
||||
// another channel in the pool (temporarily) indicates the opposite, or vice
|
||||
// versa.
|
||||
uvm_channel_t *first_channel = pool->channels;
|
||||
|
||||
UVM_ASSERT(key_rotation_is_notifier_driven());
|
||||
UVM_ASSERT(first_channel != NULL);
|
||||
|
||||
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return false;
|
||||
|
||||
if (key_rotation_is_notifier_driven())
|
||||
return conf_computing_is_key_rotation_pending_use_notifier(pool);
|
||||
else
|
||||
return conf_computing_is_key_rotation_pending_use_stats(pool);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
|
||||
|
||||
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
|
||||
// required locks at this time. This status is not interpreted as an error,
|
||||
// but as a sign for UVM to try again later. This is the same "protocol"
|
||||
// used in IV rotation.
|
||||
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
|
||||
pool->conf_computing.key_rotation.num_csl_contexts);
|
||||
|
||||
if (status == NV_OK) {
|
||||
pool->conf_computing.key_rotation.version++;
|
||||
|
||||
if (!key_rotation_is_notifier_driven()) {
|
||||
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
|
||||
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
|
||||
}
|
||||
}
|
||||
else if (status != NV_ERR_STATE_IN_USE) {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
|
||||
pool->engine_index,
|
||||
nvstatusToString(status));
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -87,9 +87,9 @@ typedef struct
|
||||
// a free buffer.
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
|
||||
// writes the authentication tag here. Later when the buffer is decrypted
|
||||
// on the CPU the authentication tag is used again (read) for CSL to verify
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// engine (CE or SEC2) writes the authentication tag here. When the buffer
|
||||
// is decrypted on the CPU the authentication tag is used by CSL to verify
|
||||
// the authenticity. The allocation is big enough for one authentication
|
||||
// tag per PAGE_SIZE page in the alloc buffer.
|
||||
uvm_mem_t *auth_tag;
|
||||
@@ -98,7 +98,12 @@ typedef struct
|
||||
// to the authentication tag. The allocation is big enough for one IV per
|
||||
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
|
||||
// IV and authentication tag must match.
|
||||
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
|
||||
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// key version used during GPU encryption of each PAGE_SIZE page can be
|
||||
// saved here, so CPU decryption uses the correct decryption key.
|
||||
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// Bitmap of the encrypted pages in the backing allocation
|
||||
uvm_page_mask_t encrypted_page_mask;
|
||||
@@ -147,7 +152,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
// Logs encryption information from the GPU and returns the IV.
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
|
||||
|
||||
// Acquires next CPU encryption IV and returns it.
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
@@ -167,10 +172,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
// CPU side decryption helper. Decrypts data from src_cipher and writes the
|
||||
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
|
||||
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
|
||||
//
|
||||
// The caller must indicate which key to use for decryption by passing the
|
||||
// appropiate key version number.
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer);
|
||||
|
||||
@@ -191,12 +200,12 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU8 valid);
|
||||
|
||||
// Increment the CPU-side decrypt IV of the CSL context associated with
|
||||
// replayable faults. The function is a no-op if the given increment is zero.
|
||||
// replayable faults.
|
||||
//
|
||||
// The IV associated with a fault CSL context is a 64-bit counter.
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Query the number of remaining messages before IV needs to be rotated.
|
||||
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
|
||||
@@ -214,4 +223,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
// Check if there are fewer than 'limit' messages available in either direction
|
||||
// and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
|
||||
|
||||
// Rotate the engine key associated with the given channel pool.
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is allowed in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is pending in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
|
||||
// dependent on RM enablement: key rotation may still be disabled upon calling
|
||||
// this function, if it is disabled in RM. On the other hand, key rotation can
|
||||
// be disabled in UVM, even if it is enabled in RM.
|
||||
//
|
||||
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
|
||||
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
|
||||
// kernel key rotation.
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
|
||||
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
|
||||
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
// This file provides simple wrappers that are always built with optimizations
|
||||
// turned on to WAR issues with functions that don't build correctly otherwise.
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
int nv_atomic_xchg(atomic_t *val, int new)
|
||||
{
|
||||
return atomic_xchg(val, new);
|
||||
}
|
||||
|
||||
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
|
||||
{
|
||||
return atomic_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
|
||||
{
|
||||
return atomic_long_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
return copy_from_user(to, from, n);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
return copy_to_user(to, from, n);
|
||||
}
|
||||
|
||||
@@ -412,7 +412,7 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
|
||||
UVM_ASSERT(error != NV_OK);
|
||||
|
||||
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
previous_error = atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
|
||||
if (previous_error == NV_OK) {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
|
||||
@@ -430,7 +430,7 @@ NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
return atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
}
|
||||
|
||||
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
|
||||
|
||||
@@ -138,6 +138,7 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
|
||||
if (gpu_caps.numaEnabled) {
|
||||
UVM_ASSERT(uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
}
|
||||
@@ -218,9 +219,8 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(gpu_address_space_info.bigPageSize <= NV_U32_MAX);
|
||||
|
||||
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
|
||||
|
||||
gpu->time.time0_register = gpu_address_space_info.time0Offset;
|
||||
gpu->time.time1_register = gpu_address_space_info.time1Offset;
|
||||
|
||||
@@ -459,7 +459,6 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
|
||||
switch (link_type) {
|
||||
@@ -1084,6 +1083,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
tree_alloc->addr.address,
|
||||
@@ -1279,7 +1281,8 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
|
||||
status = get_gpu_caps(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
if (status != NV_ERR_NVSWITCH_FABRIC_NOT_READY)
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -2255,7 +2258,10 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
|
||||
bool sorted;
|
||||
NvU32 ce0, ce1;
|
||||
|
||||
if (peer_caps->link_type < UVM_GPU_LINK_NVLINK_1)
|
||||
UVM_ASSERT(peer_caps->ref_count);
|
||||
UVM_ASSERT(gpu0->parent->peer_copy_mode == gpu1->parent->peer_copy_mode);
|
||||
|
||||
if (gpu0->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED)
|
||||
return;
|
||||
|
||||
sorted = uvm_id_value(gpu0->id) < uvm_id_value(gpu1->id);
|
||||
@@ -2281,7 +2287,7 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
|
||||
static int nv_procfs_read_gpu_peer_caps(struct seq_file *s, void *v)
|
||||
{
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_peer_caps_print((uvm_gpu_t **)s->private, s);
|
||||
|
||||
@@ -2363,7 +2369,9 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID || peer_caps->link_type == UVM_GPU_LINK_C2C)
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;
|
||||
@@ -3293,10 +3301,7 @@ void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
struct page *page,
|
||||
size_t size,
|
||||
NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -962,6 +962,8 @@ struct uvm_parent_gpu_struct
|
||||
// Whether CE supports physical addressing mode for writes to vidmem
|
||||
bool ce_phys_vidmem_write_supported;
|
||||
|
||||
// Addressing mode(s) supported for CE transfers between this GPU and its
|
||||
// peers: none, physical only, physical and virtual, etc.
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Virtualization mode of the GPU.
|
||||
|
||||
@@ -684,7 +684,10 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
|
||||
++get;
|
||||
@@ -692,6 +695,7 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
get = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
@@ -817,12 +821,18 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
|
||||
(fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = ¬ification_cache[notification_index];
|
||||
|
||||
// We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
|
||||
// individually since entries can be written out of order
|
||||
// We cannot just wait for the last entry (the one pointed by put) to
|
||||
// become valid, we have to do it individually since entries can be
|
||||
// written out of order
|
||||
UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
|
||||
goto done;
|
||||
|
||||
// There's no entry to work on and something has gone wrong. Ignore
|
||||
// the rest.
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Prevent later accesses being moved above the read of the valid bit
|
||||
|
||||
@@ -591,7 +591,7 @@ static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU
|
||||
// replayable faults still requires manual adjustment so it is kept in sync
|
||||
// with the encryption IV on the GSP-RM's side.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);
|
||||
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
|
||||
}
|
||||
@@ -631,7 +631,15 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
// Channels might be idle (e.g. in teardown) so check for errors
|
||||
// actively.
|
||||
status = uvm_channel_manager_check_errors(gpu->channel_manager);
|
||||
if (status != NV_OK) {
|
||||
write_get(parent_gpu, get);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
fault_buffer_skip_replayable_entry(parent_gpu, get);
|
||||
++get;
|
||||
@@ -864,6 +872,10 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
|
||||
status = uvm_global_get_status();
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Prevent later accesses being moved above the read of the valid bit
|
||||
|
||||
@@ -60,6 +60,17 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
// Allocation backing the page
|
||||
uvm_rm_mem_t *memory;
|
||||
|
||||
struct {
|
||||
// Unprotected sysmem storing encrypted value of semaphores
|
||||
uvm_rm_mem_t *encrypted_payload_memory;
|
||||
|
||||
// Unprotected sysmem storing encryption auth tags
|
||||
uvm_rm_mem_t *auth_tag_memory;
|
||||
|
||||
// Unprotected sysmem storing plain text notifier values
|
||||
uvm_rm_mem_t *notifier_memory;
|
||||
} conf_computing;
|
||||
|
||||
// Pool the page is part of
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
|
||||
@@ -80,26 +91,6 @@ static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
|
||||
return gpu_semaphore_pool_is_secure(semaphore->page->pool);
|
||||
}
|
||||
|
||||
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU32 offset;
|
||||
NvU32 index;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return semaphore->conf_computing.index;
|
||||
|
||||
UVM_ASSERT(semaphore->payload != NULL);
|
||||
UVM_ASSERT(semaphore->page != NULL);
|
||||
|
||||
offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
|
||||
|
||||
index = offset / UVM_SEMAPHORE_SIZE;
|
||||
UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Use canary values on debug builds to catch semaphore use-after-free. We can
|
||||
// catch release-after-free by simply setting the payload to a known value at
|
||||
// free then checking it on alloc or pool free, but catching acquire-after-free
|
||||
@@ -150,34 +141,83 @@ static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
// Secure semaphore pools are allocated in the CPR of vidmem and only mapped to
|
||||
// the owning GPU as no other processor have access to it.
|
||||
static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool,
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page,
|
||||
uvm_rm_mem_type_t memory_type)
|
||||
static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
uvm_rm_mem_free(page->memory);
|
||||
page->memory = NULL;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(page->pool)) {
|
||||
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.notifier_memory);
|
||||
|
||||
page->conf_computing.encrypted_payload_memory = NULL;
|
||||
page->conf_computing.auth_tag_memory = NULL;
|
||||
page->conf_computing.notifier_memory = NULL;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
|
||||
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
|
||||
UVM_ASSERT(!page->conf_computing.notifier_memory);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_t *pool = page->pool;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
size_t align = 0;
|
||||
bool map_all = true;
|
||||
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
|
||||
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
|
||||
|
||||
UVM_ASSERT(gpu_semaphore_pool_is_secure(pool));
|
||||
status = uvm_rm_mem_alloc(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&pool_page->memory);
|
||||
if (map_all)
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
else
|
||||
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto error;
|
||||
|
||||
if (!gpu_semaphore_pool_is_secure(pool))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&page->conf_computing.encrypted_payload_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&page->conf_computing.auth_tag_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
|
||||
0,
|
||||
&page->conf_computing.notifier_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
error:
|
||||
pool_page_free_buffers(page);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@@ -188,24 +228,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
|
||||
pool_page->pool = pool;
|
||||
|
||||
// Whenever the Confidential Computing feature is enabled, engines can
|
||||
// access semaphores only in the CPR of vidmem. Mapping to other GPUs is
|
||||
// also disabled.
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
status = pool_alloc_secure_page(pool, pool_page, memory_type);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
memory_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
status = pool_page_alloc_buffers(pool_page);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
@@ -217,7 +242,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
|
||||
if (semaphore_uses_canary(pool)) {
|
||||
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
size_t i;
|
||||
NvU32 *payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
|
||||
|
||||
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
|
||||
payloads[i] = make_canary(0);
|
||||
}
|
||||
@@ -253,7 +280,7 @@ static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
|
||||
|
||||
pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
|
||||
list_del(&page->all_pages_node);
|
||||
uvm_rm_mem_free(page->memory);
|
||||
pool_page_free_buffers(page);
|
||||
uvm_kvfree(page);
|
||||
}
|
||||
|
||||
@@ -273,19 +300,22 @@ NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaph
|
||||
goto done;
|
||||
|
||||
list_for_each_entry(page, &pool->pages, all_pages_node) {
|
||||
NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
const NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
UVM_ASSERT(semaphore_index <= UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
|
||||
continue;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
semaphore->conf_computing.index = semaphore_index;
|
||||
}
|
||||
else {
|
||||
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) +
|
||||
semaphore_index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
semaphore->page = page;
|
||||
semaphore->index = semaphore_index;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(pool)) {
|
||||
|
||||
// Reset the notifier to prevent detection of false attack when
|
||||
// checking for updated value
|
||||
*uvm_gpu_semaphore_get_notifier_cpu_va(semaphore) = semaphore->conf_computing.last_observed_notifier;
|
||||
}
|
||||
|
||||
if (semaphore_uses_canary(pool))
|
||||
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
|
||||
@@ -311,7 +341,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
uvm_gpu_semaphore_pool_t *pool;
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(semaphore);
|
||||
|
||||
@@ -323,7 +352,6 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
return;
|
||||
|
||||
pool = page->pool;
|
||||
index = get_index(semaphore);
|
||||
|
||||
// Write a known value lower than the current payload in an attempt to catch
|
||||
// release-after-free and acquire-after-free.
|
||||
@@ -333,10 +361,9 @@ void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
|
||||
semaphore->page = NULL;
|
||||
semaphore->payload = NULL;
|
||||
|
||||
++pool->free_semaphores_count;
|
||||
__set_bit(index, page->free_semaphores);
|
||||
__set_bit(semaphore->index, page->free_semaphores);
|
||||
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
}
|
||||
@@ -449,18 +476,72 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
|
||||
{
|
||||
NvU32 index = get_index(semaphore);
|
||||
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address;
|
||||
|
||||
return base_va + UVM_SEMAPHORE_SIZE * index;
|
||||
return base_va + semaphore->index * UVM_SEMAPHORE_SIZE;
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *base_va;
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return &semaphore->conf_computing.cached_payload;
|
||||
|
||||
base_va = uvm_rm_mem_get_cpu_va(semaphore->page->memory);
|
||||
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
|
||||
|
||||
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_notifier_t *notifier_base_va =
|
||||
uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
|
||||
return notifier_base_va + semaphore->index;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va +
|
||||
semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
|
||||
}
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
|
||||
|
||||
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
}
|
||||
|
||||
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload);
|
||||
|
||||
return UVM_GPU_READ_ONCE(*semaphore->payload);
|
||||
return UVM_GPU_READ_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore));
|
||||
}
|
||||
|
||||
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
|
||||
@@ -477,10 +558,7 @@ void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload
|
||||
// the GPU correctly even on non-SMP).
|
||||
mb();
|
||||
|
||||
if (gpu_semaphore_is_secure(semaphore))
|
||||
UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload);
|
||||
else
|
||||
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
|
||||
UVM_GPU_WRITE_ONCE(*uvm_gpu_semaphore_get_cpu_va(semaphore), payload);
|
||||
}
|
||||
|
||||
// This function is intended to catch channels which have been left dangling in
|
||||
@@ -546,22 +624,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
|
||||
}
|
||||
|
||||
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
|
||||
static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
// No new value, or the GPU is currently writing the new encrypted material
|
||||
// and no change in value would still result in corrupted data.
|
||||
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
|
||||
}
|
||||
|
||||
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
UvmCslIv local_iv;
|
||||
NvU32 local_payload;
|
||||
NvU32 new_sem_value;
|
||||
NvU32 gpu_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 new_gpu_notifier = 0;
|
||||
NvU32 iv_index = 0;
|
||||
uvm_gpu_semaphore_notifier_t gpu_notifier;
|
||||
uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
|
||||
|
||||
// A channel can have multiple entries pending and the tracking semaphore
|
||||
// update of each entry can race with this function. Since the semaphore
|
||||
@@ -570,64 +637,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
unsigned tries_left = channel->num_gpfifo_entries;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
|
||||
|
||||
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
|
||||
return;
|
||||
|
||||
do {
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
|
||||
UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
|
||||
|
||||
// Odd notifier value means there's an update in progress.
|
||||
if (gpu_notifier % 2)
|
||||
continue;
|
||||
|
||||
// There's no change since last time
|
||||
if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
|
||||
return;
|
||||
|
||||
// Make sure no memory accesses happen before we read the notifier
|
||||
smp_mb__after_atomic();
|
||||
|
||||
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*payload_cpu_addr);
|
||||
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
|
||||
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
|
||||
// Make sure the second read of notifier happens after
|
||||
// all memory accesses.
|
||||
smp_mb__before_atomic();
|
||||
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
new_gpu_notifier = UVM_READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
tries_left--;
|
||||
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
|
||||
|
||||
if (!tries_left) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
NvU32 key_version;
|
||||
const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
NvU32 new_semaphore_value;
|
||||
|
||||
UVM_ASSERT(gpu_notifier == new_gpu_notifier);
|
||||
UVM_ASSERT(gpu_notifier % 2 == 0);
|
||||
|
||||
// CPU decryption is guaranteed to use the same key version as the
|
||||
// associated GPU encryption, because if there was any key rotation in
|
||||
// between, then key rotation waited for all channels to complete before
|
||||
// proceeding. The wait implies that the semaphore value matches the
|
||||
// last one encrypted on the GPU, so this CPU decryption should happen
|
||||
// before the key is rotated.
|
||||
key_version = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
if (gpu_notifier == new_gpu_notifier) {
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
&new_sem_value,
|
||||
&new_semaphore_value,
|
||||
&local_payload,
|
||||
&local_iv,
|
||||
sizeof(new_sem_value),
|
||||
&semaphore->conf_computing.ivs[iv_index],
|
||||
key_version,
|
||||
sizeof(new_semaphore_value),
|
||||
&local_auth_tag);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
|
||||
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
|
||||
}
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
error:
|
||||
// Decryption failure is a fatal error as well as running out of try left.
|
||||
@@ -650,11 +725,11 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
else
|
||||
uvm_assert_spinlock_locked(&tracking_semaphore->s_lock);
|
||||
|
||||
if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) {
|
||||
if (gpu_semaphore_is_secure(&tracking_semaphore->semaphore)) {
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
|
||||
// mechanism to all semaphore
|
||||
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
|
||||
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
}
|
||||
|
||||
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
|
||||
@@ -690,7 +765,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
|
||||
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
|
||||
uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
|
||||
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
|
||||
(NvU64)(uintptr_t)uvm_gpu_semaphore_get_cpu_va(&tracking_semaphore->semaphore),
|
||||
old_value, new_value);
|
||||
|
||||
// Use an atomic write even though the lock is held so that the value can
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#include "uvm_rm_mem.h"
|
||||
#include "uvm_linux.h"
|
||||
|
||||
typedef NvU32 uvm_gpu_semaphore_notifier_t;
|
||||
|
||||
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
|
||||
// that's used for synchronization among them.
|
||||
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
|
||||
@@ -45,17 +47,15 @@ struct uvm_gpu_semaphore_struct
|
||||
// The semaphore pool page the semaphore came from
|
||||
uvm_gpu_semaphore_pool_page_t *page;
|
||||
|
||||
// Pointer to the memory location
|
||||
NvU32 *payload;
|
||||
// Index of the semaphore in semaphore page
|
||||
NvU16 index;
|
||||
|
||||
struct {
|
||||
NvU16 index;
|
||||
NvU32 cached_payload;
|
||||
uvm_rm_mem_t *encrypted_payload;
|
||||
uvm_rm_mem_t *notifier;
|
||||
uvm_rm_mem_t *auth_tag;
|
||||
UvmCslIv *ivs;
|
||||
NvU32 last_pushed_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 cached_payload;
|
||||
|
||||
uvm_gpu_semaphore_notifier_t last_pushed_notifier;
|
||||
uvm_gpu_semaphore_notifier_t last_observed_notifier;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
@@ -151,6 +151,17 @@ NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu
|
||||
|
||||
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
// Read the 32-bit payload of the semaphore
|
||||
// Notably doesn't provide any memory ordering guarantees and needs to be used with
|
||||
// care. For an example of what needs to be considered see
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -251,9 +251,6 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_release = uvm_hal_turing_host_semaphore_release,
|
||||
.clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
|
||||
.set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
|
||||
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -635,19 +632,13 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(host_table,
|
||||
ARRAY_SIZE(host_table),
|
||||
HOST_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(arch_table,
|
||||
ARRAY_SIZE(arch_table),
|
||||
ARCH_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
@@ -941,16 +932,14 @@ const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
|
||||
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
|
||||
{
|
||||
UVM_DBG_PRINT("fault_address: 0x%llx\n", entry->fault_address);
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n",
|
||||
entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n", entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_type: %s\n", uvm_fault_type_string(entry->fault_type));
|
||||
UVM_DBG_PRINT(" fault_access_type: %s\n", uvm_fault_access_type_string(entry->fault_access_type));
|
||||
UVM_DBG_PRINT(" is_replayable: %s\n", entry->is_replayable? "true": "false");
|
||||
UVM_DBG_PRINT(" is_virtual: %s\n", entry->is_virtual? "true": "false");
|
||||
UVM_DBG_PRINT(" in_protected_mode: %s\n", entry->in_protected_mode? "true": "false");
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n",
|
||||
uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_id: %d\n", entry->fault_source.client_id);
|
||||
UVM_DBG_PRINT(" fault_source.gpc_id: %d\n", entry->fault_source.gpc_id);
|
||||
UVM_DBG_PRINT(" fault_source.mmu_engine_id: %d\n", entry->fault_source.mmu_engine_id);
|
||||
@@ -973,15 +962,13 @@ const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_coun
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
|
||||
{
|
||||
if (!entry->address.is_virtual) {
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -112,10 +112,6 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
@@ -153,49 +149,42 @@ typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
||||
@@ -207,9 +196,6 @@ void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
@@ -459,15 +445,15 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Retrieve the page-tree HAL for a given big page size
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
||||
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@@ -73,6 +73,24 @@ module_param(uvm_disable_hmm, bool, 0444);
|
||||
#include "uvm_va_policy.h"
|
||||
#include "uvm_tools.h"
|
||||
|
||||
// The function nv_PageSwapCache() wraps the check for page swap cache flag in
|
||||
// order to support a wide variety of kernel versions.
|
||||
// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove
|
||||
// PageSwapCache") in v6.12-rc1.
|
||||
// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551
|
||||
// "mm: Add folio flag manipulation functions")
|
||||
// Systems with HMM patches backported to 5.14 are possible, but those systems
|
||||
// do not include folio_test_swapcache()
|
||||
// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated
|
||||
static __always_inline bool nv_PageSwapCache(struct page *page)
|
||||
{
|
||||
#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT)
|
||||
return folio_test_swapcache(page_folio(page));
|
||||
#else
|
||||
return PageSwapCache(page);
|
||||
#endif
|
||||
}
|
||||
|
||||
static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
struct page *page);
|
||||
@@ -145,7 +163,7 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
|
||||
// Copies the contents of the source device-private page to the
|
||||
// destination CPU page. This will invalidate mappings, so cannot be
|
||||
// called while holding any va_block locks.
|
||||
static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
{
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_gpu_phys_address_t src_addr;
|
||||
@@ -166,7 +184,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
|
||||
status = uvm_mmu_chunk_map(gpu_chunk);
|
||||
if (status != NV_OK)
|
||||
goto out_zero;
|
||||
goto out;
|
||||
|
||||
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
if (status != NV_OK)
|
||||
@@ -189,7 +207,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
uvm_push_end(&push);
|
||||
status = uvm_tracker_add_push_safe(&tracker, &push);
|
||||
if (status == NV_OK)
|
||||
uvm_tracker_wait_deinit(&tracker);
|
||||
status = uvm_tracker_wait_deinit(&tracker);
|
||||
|
||||
out_unmap_cpu:
|
||||
uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
|
||||
@@ -197,13 +215,8 @@ out_unmap_cpu:
|
||||
out_unmap_gpu:
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, NULL);
|
||||
|
||||
out_zero:
|
||||
// We can't fail eviction because we need to free the device-private pages
|
||||
// so the GPU can be unregistered. So the best we can do is warn on any
|
||||
// failures and zero the uninitialised page. This could result in data loss
|
||||
// in the application but failures are not expected.
|
||||
if (WARN_ON(status != NV_OK))
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
@@ -227,7 +240,13 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
}
|
||||
|
||||
lock_page(dst_page);
|
||||
hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
|
||||
|
||||
// We can't fail eviction because we need to free the device-private
|
||||
// pages so the GPU can be unregistered. So the best we can do is warn
|
||||
// on any failures and zero the uninitialized page. This could result
|
||||
// in data loss in the application but failures are not expected.
|
||||
if (hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn)) != NV_OK)
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
}
|
||||
@@ -284,8 +303,10 @@ static void hmm_va_block_unregister_gpu(uvm_va_block_t *va_block,
|
||||
|
||||
// Reset preferred location and accessed-by of policy nodes if needed.
|
||||
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
|
||||
if (uvm_id_equal(node->policy.preferred_location, gpu->id))
|
||||
if (uvm_va_policy_preferred_location_equal(&node->policy, gpu->id, NUMA_NO_NODE)) {
|
||||
node->policy.preferred_location = UVM_ID_INVALID;
|
||||
node->policy.preferred_nid = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
uvm_processor_mask_clear(&node->policy.accessed_by, gpu->id);
|
||||
}
|
||||
@@ -1599,7 +1620,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
@@ -2696,7 +2717,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PageSwapCache(src_page)) {
|
||||
if (nv_PageSwapCache(src_page)) {
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be
|
||||
// migrated.
|
||||
status = NV_WARN_MISMATCHED_TARGET;
|
||||
@@ -3466,12 +3487,17 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
lock_page(dst_page);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
|
||||
hmm_copy_devmem_page(dst_page, src_page);
|
||||
status = hmm_copy_devmem_page(dst_page, src_page);
|
||||
if (status != NV_OK) {
|
||||
unlock_page(dst_page);
|
||||
__free_page(dst_page);
|
||||
dst_pfn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
migrate_vma_pages(&args);
|
||||
|
||||
out:
|
||||
if (status == NV_OK)
|
||||
migrate_vma_pages(&args);
|
||||
migrate_vma_finalize(&args);
|
||||
|
||||
return status;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -157,7 +157,6 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@@ -184,12 +183,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@@ -202,9 +196,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
@@ -212,7 +204,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@@ -220,7 +212,6 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@@ -230,9 +221,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@@ -286,13 +277,8 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@@ -306,9 +292,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
@@ -316,12 +300,12 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@@ -348,11 +332,6 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@@ -364,7 +343,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@@ -379,7 +358,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@@ -393,9 +372,14 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
|
||||
|
||||
@@ -61,7 +61,7 @@ uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_hopper(NvU64 page_size)
|
||||
static NvU32 page_table_depth_hopper(NvU32 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@@ -79,7 +79,7 @@ static NvU32 entries_per_index_hopper(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU64 page_size)
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
|
||||
@@ -92,7 +92,7 @@ static NvLength entry_size_hopper(NvU32 depth)
|
||||
return entries_per_index_hopper(depth) * 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU64 page_size)
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
|
||||
|
||||
@@ -120,7 +120,7 @@ static NvU32 num_va_bits_hopper(void)
|
||||
return 57;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
|
||||
@@ -233,7 +233,7 @@ static NvU64 make_sparse_pte_hopper(void)
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_hopper(NvU64 page_size)
|
||||
static NvU64 unmapped_pte_hopper(NvU32 page_size)
|
||||
{
|
||||
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
|
||||
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
|
||||
@@ -490,7 +490,7 @@ static void make_pde_hopper(void *entry,
|
||||
|
||||
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2024 NVidia Corporation
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -494,7 +494,7 @@ typedef struct
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
@@ -952,6 +952,7 @@ typedef struct
|
||||
NvU32 version; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
|
||||
//
|
||||
// UvmMapDynamicParallelismRegion
|
||||
//
|
||||
@@ -994,7 +995,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
|
||||
|
||||
@@ -226,48 +226,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define __GFP_NORETRY 0
|
||||
#endif
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
|
||||
// Develop builds define DEBUG but enable optimization
|
||||
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
|
||||
// Wrappers for functions not building correctly without optimizations on,
|
||||
// implemented in uvm_debug_optimized.c. Notably the file is only built for
|
||||
// debug builds, not develop or release builds.
|
||||
|
||||
// Unoptimized builds of atomic_xchg() hit a BUILD_BUG() on arm64 as it relies
|
||||
// on __xchg being completely inlined:
|
||||
// /usr/src/linux-3.12.19/arch/arm64/include/asm/cmpxchg.h:67:3: note: in expansion of macro 'BUILD_BUG'
|
||||
//
|
||||
// Powerppc hits a similar issue, but ends up with an undefined symbol:
|
||||
// WARNING: "__xchg_called_with_bad_pointer" [...] undefined!
|
||||
int nv_atomic_xchg(atomic_t *val, int new);
|
||||
|
||||
// Same problem as atomic_xchg() on powerppc:
|
||||
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
|
||||
int nv_atomic_cmpxchg(atomic_t *val, int old, int new);
|
||||
|
||||
// Same problem as atomic_xchg() on powerppc:
|
||||
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
|
||||
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new);
|
||||
|
||||
// This Linux kernel commit:
|
||||
// 2016-08-30 0d025d271e55f3de21f0aaaf54b42d20404d2b23
|
||||
// leads to build failures on x86_64, when compiling without optimization. Avoid
|
||||
// that problem, by providing our own builds of copy_from_user / copy_to_user,
|
||||
// for debug (non-optimized) UVM builds. Those are accessed via these
|
||||
// nv_copy_to/from_user wrapper functions.
|
||||
//
|
||||
// Bug 1849583 has further details.
|
||||
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n);
|
||||
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n);
|
||||
|
||||
#else
|
||||
#define nv_atomic_xchg atomic_xchg
|
||||
#define nv_atomic_cmpxchg atomic_cmpxchg
|
||||
#define nv_atomic_long_cmpxchg atomic_long_cmpxchg
|
||||
#define nv_copy_to_user copy_to_user
|
||||
#define nv_copy_from_user copy_from_user
|
||||
#endif
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)
|
||||
|
||||
#ifndef NV_ALIGN_DOWN
|
||||
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@@ -48,7 +48,9 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION_WLC);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
|
||||
|
||||
@@ -322,6 +322,15 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
// - Channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex per channel pool
|
||||
//
|
||||
// The lock ensures mutual exclusion during key rotation affecting all the
|
||||
// channels in the associated pool. Key rotation in WLC pools is handled
|
||||
// using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
|
||||
//
|
||||
// - CE channel CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@@ -338,6 +347,15 @@
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to CE channels (except for WLC channels)
|
||||
//
|
||||
// - WLC channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex of WLC channel pool
|
||||
//
|
||||
// The lock has the same purpose as the regular channel pool key rotation
|
||||
// lock. Using a different order lock for WLC channels allows key rotation
|
||||
// on those channels during indirect work submission.
|
||||
//
|
||||
// - WLC CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@@ -484,7 +502,9 @@ typedef enum
|
||||
UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
|
||||
UVM_LOCK_ORDER_CHUNK_MAPPING,
|
||||
UVM_LOCK_ORDER_PAGE_TREE,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION,
|
||||
UVM_LOCK_ORDER_CSL_PUSH,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
|
||||
UVM_LOCK_ORDER_CSL_WLC_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -61,7 +61,7 @@ typedef struct
|
||||
size_t buffer_size;
|
||||
|
||||
// Page size in bytes
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
|
||||
// Size of a single PTE in bytes
|
||||
NvU32 pte_size;
|
||||
@@ -91,7 +91,7 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
uvm_gpu_t *gpu,
|
||||
const uvm_map_rm_params_t *map_rm_params,
|
||||
NvU64 length,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pte_buffer_t *pte_buffer)
|
||||
{
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_range->va_space, gpu);
|
||||
@@ -650,7 +650,9 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
return NV_OK;
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered. This also checks for if EGM owning GPU is registered.
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@@ -663,6 +665,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// semantics of sysmem allocations.
|
||||
|
||||
// Check if peer access for peer memory is enabled.
|
||||
// This path also handles EGM allocations.
|
||||
if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
|
||||
// TODO: Bug 1757136: In SLI, the returned UUID may be different but a
|
||||
// local mapping must be used. We need to query SLI groups to know
|
||||
@@ -853,10 +856,9 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU64 mapping_page_size;
|
||||
NvU64 biggest_mapping_page_size;
|
||||
NvU32 mapping_page_size;
|
||||
NvU64 alignments;
|
||||
NvU64 smallest_alignment;
|
||||
NvU32 smallest_alignment;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
@@ -945,11 +947,9 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
|
||||
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
@@ -986,7 +986,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (uvm_api_range_invalid_4k(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
mapped_gpus = uvm_processor_mask_cache_alloc();
|
||||
|
||||
@@ -108,7 +108,7 @@ void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
// No per VA invalidate on Maxwell, redirect to invalidate all.
|
||||
|
||||
@@ -52,7 +52,7 @@ static NvU32 entries_per_index_maxwell(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU64 page_size)
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 0)
|
||||
@@ -128,7 +128,7 @@ static NvLength entry_size_maxwell(NvU32 depth)
|
||||
return 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@@ -146,7 +146,7 @@ static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
}
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@@ -169,32 +169,32 @@ static NvU32 num_va_bits_maxwell(void)
|
||||
return 40;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_64(depth, page_size);
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_128(depth, page_size);
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_maxwell(NvU64 page_size)
|
||||
static NvU32 page_table_depth_maxwell(NvU32 page_size)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_maxwell_128(void)
|
||||
static NvU32 page_sizes_maxwell_128(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_maxwell_64(void)
|
||||
static NvU32 page_sizes_maxwell_64(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_maxwell(NvU64 page_size)
|
||||
static NvU64 unmapped_pte_maxwell(NvU32 page_size)
|
||||
{
|
||||
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
|
||||
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
|
||||
@@ -356,7 +356,7 @@ static uvm_mmu_mode_hal_t maxwell_128_mmu_mode_hal =
|
||||
.page_sizes = page_sizes_maxwell_128
|
||||
};
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size)
|
||||
{
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
if (big_page_size == UVM_PAGE_SIZE_64K)
|
||||
|
||||
@@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
|
||||
// Get the mmu mode hal directly as the internal address space tree has not
|
||||
// been created yet.
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
|
||||
NvU64 page_sizes = hal->page_sizes();
|
||||
NvU32 page_sizes = hal->page_sizes();
|
||||
|
||||
return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
|
||||
}
|
||||
|
||||
static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
{
|
||||
NvU64 biggest_page_size;
|
||||
NvU64 chunk_size;
|
||||
NvU32 biggest_page_size;
|
||||
NvU32 chunk_size;
|
||||
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return PAGE_SIZE;
|
||||
@@ -315,12 +315,12 @@ static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
// When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
|
||||
// chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
|
||||
if (mem->backing_gpu->mem_info.numa.enabled)
|
||||
chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);
|
||||
chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
|
||||
|
||||
return chunk_size;
|
||||
}
|
||||
|
||||
static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
{
|
||||
if (uvm_mem_is_vidmem(mem)) {
|
||||
// For vidmem allocations the chunk size is picked out of the supported
|
||||
@@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
NvU64 *dma_addrs;
|
||||
|
||||
UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
|
||||
"mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
|
||||
"mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
|
||||
mem->chunk_size);
|
||||
UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
|
||||
|
||||
@@ -528,9 +528,10 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_memory_type_t mem_type;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
@@ -547,15 +548,23 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
if (!mem->vidmem.chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
// When CC is disabled the behavior is identical to that of PMM, and the
|
||||
// protection flag is ignored (squashed by PMM internally).
|
||||
if (is_unprotected)
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
|
||||
else
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
mem_type,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
nvstatusToString(status));
|
||||
@@ -565,7 +574,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@@ -587,7 +596,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero);
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
|
||||
@@ -617,6 +626,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_unprotected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@@ -638,7 +648,12 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero);
|
||||
if (params->is_unprotected)
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
is_unprotected = params->is_unprotected;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@@ -1035,7 +1050,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
uvm_page_table_range_vec_t **range_vec)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
|
||||
uvm_mem_pte_maker_data_t pte_maker_data = {
|
||||
@@ -1044,7 +1059,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
};
|
||||
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
|
||||
// When the Confidential Computing feature is enabled, DMA allocations are
|
||||
// majoritarily allocated and managed by a per-GPU DMA buffer pool
|
||||
|
||||
@@ -126,7 +126,12 @@ typedef struct
|
||||
//
|
||||
// CPU mappings will always use PAGE_SIZE, so the physical allocation chunk
|
||||
// has to be aligned to PAGE_SIZE.
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
|
||||
// The protection flag is only observed for vidmem allocations when CC is
|
||||
// enabled. If set to true, the allocation returns unprotected vidmem;
|
||||
// otherwise, the allocation returns protected vidmem.
|
||||
bool is_unprotected;
|
||||
|
||||
// If true, the allocation is zeroed (scrubbed).
|
||||
bool zero;
|
||||
@@ -194,7 +199,7 @@ struct uvm_mem_struct
|
||||
size_t chunks_count;
|
||||
|
||||
// Size of each physical chunk (vidmem) or CPU page (sysmem)
|
||||
NvU64 chunk_size;
|
||||
NvU32 chunk_size;
|
||||
|
||||
// Size of the allocation
|
||||
NvU64 size;
|
||||
|
||||
@@ -153,7 +153,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
|
||||
for (i = 0; i < verif_size / sizeof(*sys_verif); ++i) {
|
||||
if (sys_verif[i] != mem->size + i) {
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%llu, processor=%u)\n",
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%u, processor=%u)\n",
|
||||
i,
|
||||
sys_verif[i],
|
||||
(NvU64)(verif_size + i),
|
||||
@@ -241,7 +241,7 @@ static NV_STATUS test_map_cpu(uvm_mem_t *mem)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@@ -299,7 +299,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@@ -334,7 +334,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool should_test_page_size(size_t alloc_size, NvU64 page_size)
|
||||
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
|
||||
{
|
||||
if (g_uvm_global.num_simulated_devices == 0)
|
||||
return true;
|
||||
@@ -359,7 +359,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
// size on pre-Pascal GPUs with 128K big page size.
|
||||
// Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
|
||||
// 2M, we don't test for this page size.
|
||||
static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
static const NvU32 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
|
||||
// All supported page sizes will be tested, CPU has the most with 4 and +1
|
||||
// for the default.
|
||||
@@ -494,6 +494,41 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_vidmem_unprotected(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *mem = NULL;
|
||||
|
||||
uvm_mem_alloc_params_t params = { 0 };
|
||||
params.size = UVM_PAGE_SIZE_4K;
|
||||
params.backing_gpu = gpu;
|
||||
params.page_size = UVM_PAGE_SIZE_4K;
|
||||
|
||||
// If CC is enabled, the protection flag is observed. Because currently all
|
||||
// vidmem is in the protected region, the allocation should succeed.
|
||||
//
|
||||
// If CC is disabled, the protection flag is ignored.
|
||||
params.is_unprotected = false;
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
mem = NULL;
|
||||
|
||||
// If CC is enabled, the allocation should fail because currently the
|
||||
// unprotected region is empty.
|
||||
//
|
||||
// If CC is disabled, the behavior should be identical to that of a
|
||||
// protected allocation.
|
||||
params.is_unprotected = true;
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
TEST_CHECK_RET(uvm_mem_alloc(¶ms, &mem) == NV_ERR_NO_MEMORY);
|
||||
else
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_sysmem(void)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -578,6 +613,7 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_sysmem_dma(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem_unprotected(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_dma_pool(gpu));
|
||||
}
|
||||
|
||||
|
||||
@@ -130,27 +130,12 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_prot_t prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
|
||||
// Get the mask of unmapped pages because it will change after the
|
||||
// first map operation
|
||||
uvm_va_block_unmapped_pages_get(va_block, region, &va_block_context->caller_page_mask);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
|
||||
// Do not map pages that are already resident on the CPU. This is in
|
||||
// order to avoid breaking system-wide atomic operations on HMM. HMM's
|
||||
// implementation of system-side atomic operations involves restricting
|
||||
// mappings to one processor (CPU or a GPU) at a time. If we were to
|
||||
// grant a GPU a mapping to system memory, this gets into trouble
|
||||
// because, on the CPU side, Linux can silently upgrade PTE permissions
|
||||
// (move from read-only, to read-write, without any MMU notifiers
|
||||
// firing), thus breaking the model by allowing simultaneous read-write
|
||||
// access from two separate processors. To avoid that, just don't map
|
||||
// such pages at all, when migrating.
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
|
||||
}
|
||||
|
||||
// Only map those pages that are not mapped anywhere else (likely due
|
||||
// to a first touch or a migration). We pass
|
||||
// UvmEventMapRemoteCauseInvalid since the destination processor of a
|
||||
@@ -166,6 +151,31 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && UVM_ID_IS_CPU(dest_id)) {
|
||||
uvm_processor_id_t id;
|
||||
|
||||
// Do not atomically map pages that are resident on the CPU. This is in
|
||||
// order to avoid breaking system-wide atomic operations on HMM. HMM's
|
||||
// implementation of system-side atomic operations involves restricting
|
||||
// mappings to one processor (CPU or a GPU) at a time. If we were to
|
||||
// grant a GPU a mapping to system memory, this gets into trouble
|
||||
// because, on the CPU side, Linux can silently upgrade PTE permissions
|
||||
// (move from read-only, to read-write, without any MMU notifiers
|
||||
// firing), thus breaking the model by allowing simultaneous read-write
|
||||
// access from two separate processors. To avoid that, don't remote map
|
||||
// such pages atomically, after migrating.
|
||||
// Also note that HMM sets CPU mapping for resident pages so the mask
|
||||
// of pages to be mapped needs to be recomputed without including the
|
||||
// CPU mapping.
|
||||
prot = UVM_PROT_READ_WRITE;
|
||||
uvm_page_mask_region_fill(&va_block_context->caller_page_mask, region);
|
||||
for_each_gpu_id_in_mask(id, &va_block->mapped) {
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_map_mask_get(va_block, id));
|
||||
}
|
||||
}
|
||||
|
||||
// Add mappings for AccessedBy processors
|
||||
//
|
||||
// No mappings within this call will operate on dest_id, so we don't
|
||||
@@ -176,7 +186,7 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
dest_id,
|
||||
region,
|
||||
&va_block_context->caller_page_mask,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
prot,
|
||||
NULL);
|
||||
|
||||
out:
|
||||
@@ -589,7 +599,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
skipped_migrate = true;
|
||||
}
|
||||
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
||||
!uvm_id_equal(dest_id, policy->preferred_location)) {
|
||||
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
|
||||
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
|
||||
// unless it's the preferred location
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@@ -50,18 +50,18 @@
|
||||
// because that type is normally associated with the LCE mapped to the most
|
||||
// PCEs. The higher bandwidth is beneficial when doing bulk operations such as
|
||||
// clearing PTEs, or initializing a page directory/table.
|
||||
#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({ \
|
||||
NV_STATUS status; \
|
||||
uvm_channel_manager_t *manager = (tree)->gpu->channel_manager; \
|
||||
\
|
||||
if (manager == NULL) \
|
||||
status = uvm_push_begin_fake((tree)->gpu, (push)); \
|
||||
else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent)) \
|
||||
status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__); \
|
||||
else \
|
||||
status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
|
||||
\
|
||||
status; \
|
||||
#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({ \
|
||||
NV_STATUS __status; \
|
||||
uvm_channel_manager_t *__manager = (tree)->gpu->channel_manager; \
|
||||
\
|
||||
if (__manager == NULL) \
|
||||
__status = uvm_push_begin_fake((tree)->gpu, (push)); \
|
||||
else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent)) \
|
||||
__status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__); \
|
||||
else \
|
||||
__status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
|
||||
\
|
||||
__status; \
|
||||
})
|
||||
|
||||
// Default location of page table allocations
|
||||
@@ -153,17 +153,20 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
{
|
||||
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
|
||||
|
||||
// There may be scenarios where the GMMU must use a subset of the supported
|
||||
// page sizes, e.g., to comply with the vMMU supported page sizes due to
|
||||
// segmentation sizes.
|
||||
if (aperture == UVM_APERTURE_VID)
|
||||
return uvm_mmu_biggest_page_size_up_to(tree, tree->gpu->mem_info.max_vidmem_page_size);
|
||||
|
||||
return 1ULL << __fls(tree->hal->page_sizes());
|
||||
if (aperture == UVM_APERTURE_VID) {
|
||||
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
|
||||
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
|
||||
}
|
||||
else {
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
|
||||
@@ -251,7 +254,7 @@ static void phys_mem_deallocate(uvm_page_tree_t *tree, uvm_mmu_page_table_alloc_
|
||||
}
|
||||
|
||||
static void page_table_range_init(uvm_page_table_range_t *range,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 start_index,
|
||||
NvU32 end_index)
|
||||
@@ -441,9 +444,9 @@ static void pde_fill(uvm_page_tree_t *tree,
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;
|
||||
|
||||
// Passing in NULL for the phys_allocs will mark the child entries as
|
||||
@@ -494,7 +497,7 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_direc
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU32 depth,
|
||||
uvm_pmm_alloc_flags_t pmm_flags)
|
||||
{
|
||||
@@ -543,7 +546,7 @@ static inline NvU32 entry_index_from_vaddr(NvU64 vaddr, NvU32 addr_bit_shift, Nv
|
||||
return (NvU32)((vaddr >> addr_bit_shift) & mask);
|
||||
}
|
||||
|
||||
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU64 page_size)
|
||||
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
|
||||
}
|
||||
@@ -580,7 +583,7 @@ static void pde_write(uvm_page_tree_t *tree,
|
||||
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
|
||||
}
|
||||
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU64 page_size)
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(dir->ref_count > 0);
|
||||
|
||||
@@ -591,38 +594,35 @@ static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU
|
||||
static void pde_clear(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 entry_index,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
host_pde_clear(tree, dir, entry_index, page_size);
|
||||
pde_write(tree, dir, entry_index, false, push);
|
||||
}
|
||||
|
||||
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
|
||||
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = 0;
|
||||
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
|
||||
unsigned long page_sizes, page_size_log2;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes;
|
||||
|
||||
if (hal == NULL)
|
||||
return 0;
|
||||
if (hal != NULL) {
|
||||
unsigned long page_size_log2;
|
||||
unsigned long page_sizes = hal->page_sizes();
|
||||
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
|
||||
|
||||
page_sizes = hal->page_sizes();
|
||||
alloc_sizes = 0;
|
||||
|
||||
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
|
||||
|
||||
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
|
||||
NvU32 i;
|
||||
NvU64 page_size = 1ULL << page_size_log2;
|
||||
for (i = 0; i <= hal->page_table_depth(page_size); i++)
|
||||
alloc_sizes |= hal->allocation_size(i, page_size);
|
||||
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
|
||||
NvU32 i;
|
||||
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
|
||||
for (i = 0; i <= hal->page_table_depth(page_size); i++)
|
||||
alloc_sizes |= hal->allocation_size(i, page_size);
|
||||
}
|
||||
}
|
||||
|
||||
return alloc_sizes;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
|
||||
static NvU32 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
|
||||
|
||||
@@ -662,7 +662,7 @@ static NV_STATUS page_tree_end_and_wait(uvm_page_tree_t *tree, uvm_push_t *push)
|
||||
}
|
||||
|
||||
static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@@ -713,7 +713,7 @@ static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@@ -805,7 +805,7 @@ static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
|
||||
|
||||
// initialize new page tables and insert them into the tree
|
||||
static NV_STATUS write_gpu_state(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@@ -842,7 +842,7 @@ static void free_unused_directories(uvm_page_tree_t *tree,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU64 page_size, uvm_mmu_page_table_alloc_t *out)
|
||||
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
|
||||
{
|
||||
NvU32 depth = tree->hal->page_table_depth(page_size);
|
||||
NvLength alloc_size = tree->hal->allocation_size(depth, page_size);
|
||||
@@ -871,7 +871,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 min_va_upper, max_va_lower;
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
|
||||
if (!page_tree_ats_init_required(tree))
|
||||
return NV_OK;
|
||||
@@ -1090,7 +1090,7 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
|
||||
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_tree_type_t type,
|
||||
NvU64 big_page_size,
|
||||
NvU32 big_page_size,
|
||||
uvm_aperture_t location,
|
||||
uvm_page_tree_t *tree)
|
||||
{
|
||||
@@ -1110,7 +1110,7 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
tree->gpu_va_space = gpu_va_space;
|
||||
tree->big_page_size = big_page_size;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(tree, big_page_size));
|
||||
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
|
||||
|
||||
page_tree_set_location(tree, location);
|
||||
|
||||
@@ -1347,7 +1347,7 @@ NV_STATUS uvm_page_tree_wait(uvm_page_tree_t *tree)
|
||||
}
|
||||
|
||||
static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_page_table_range_t *range,
|
||||
@@ -1379,7 +1379,7 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
// This algorithm will work with unaligned ranges, but the caller's intent
|
||||
// is unclear
|
||||
UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0,
|
||||
"start 0x%llx size 0x%zx page_size 0x%llx\n",
|
||||
"start 0x%llx size 0x%zx page_size 0x%x\n",
|
||||
start,
|
||||
(size_t)size,
|
||||
page_size);
|
||||
@@ -1448,7 +1448,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
NvU64 page_sizes;
|
||||
NvU32 page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_alloc[1];
|
||||
|
||||
// TODO: Bug 2734399
|
||||
@@ -1460,7 +1460,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
status = page_tree_begin_acquire(tree,
|
||||
&tree->tracker,
|
||||
&push,
|
||||
"map remap: [0x%llx, 0x%llx), page_size: %lld",
|
||||
"map remap: [0x%llx, 0x%llx), page_size: %d",
|
||||
start,
|
||||
start + size,
|
||||
range->page_size);
|
||||
@@ -1500,7 +1500,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@@ -1545,7 +1545,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@@ -1596,7 +1596,7 @@ void uvm_page_table_range_shrink(uvm_page_tree_t *tree, uvm_page_table_range_t *
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single)
|
||||
@@ -1621,7 +1621,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
|
||||
static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *pte_dir,
|
||||
uvm_page_directory_t *parent,
|
||||
NvU64 page_size)
|
||||
NvU32 page_size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
@@ -1633,7 +1633,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
// The flat mappings should always be set up when executing this path
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %llu", page_size);
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %u", page_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@@ -1660,7 +1660,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children)
|
||||
@@ -1768,7 +1768,7 @@ static size_t range_vec_calc_range_index(uvm_page_table_range_vec_t *range_vec,
|
||||
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t *range_vec)
|
||||
{
|
||||
@@ -1776,8 +1776,8 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(size != 0);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%llx\n", start, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%x\n", start, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
|
||||
range_vec->tree = tree;
|
||||
range_vec->page_size = page_size;
|
||||
@@ -1826,7 +1826,7 @@ out:
|
||||
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
@@ -1952,7 +1952,7 @@ static NV_STATUS uvm_page_table_range_vec_clear_ptes_gpu(uvm_page_table_range_ve
|
||||
size_t i;
|
||||
uvm_page_tree_t *tree = range_vec->tree;
|
||||
uvm_gpu_t *gpu = tree->gpu;
|
||||
NvU64 page_size = range_vec->page_size;
|
||||
NvU32 page_size = range_vec->page_size;
|
||||
NvU32 entry_size = uvm_mmu_pte_size(tree, page_size);
|
||||
NvU64 invalid_pte = 0;
|
||||
uvm_push_t push;
|
||||
@@ -2237,7 +2237,7 @@ static NV_STATUS create_identity_mapping(uvm_gpu_t *gpu,
|
||||
NvU64 size,
|
||||
uvm_aperture_t aperture,
|
||||
NvU64 phys_offset,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -2312,7 +2312,7 @@ bool uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
NvU64 size;
|
||||
uvm_aperture_t aperture = UVM_APERTURE_VID;
|
||||
NvU64 phys_offset = 0;
|
||||
@@ -2351,7 +2351,7 @@ static void destroy_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
|
||||
NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
{
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
NvU64 size;
|
||||
uvm_aperture_t aperture;
|
||||
NvU64 phys_offset;
|
||||
@@ -2535,7 +2535,7 @@ static void root_chunk_mapping_destroy(uvm_gpu_t *gpu, uvm_gpu_root_chunk_mappin
|
||||
uvm_push_t push;
|
||||
NvU32 entry_size;
|
||||
uvm_pte_batch_t pte_batch;
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
NvU64 size;
|
||||
NvU64 invalid_pte;
|
||||
uvm_page_table_range_t *range = root_chunk_mapping->range;
|
||||
@@ -2585,7 +2585,7 @@ static NV_STATUS root_chunk_mapping_create(uvm_gpu_t *gpu, uvm_gpu_root_chunk_ma
|
||||
uvm_push_t push;
|
||||
NvU64 pte_bits;
|
||||
NvU32 entry_size;
|
||||
NvU64 page_size = UVM_CHUNK_SIZE_MAX;
|
||||
NvU32 page_size = UVM_CHUNK_SIZE_MAX;
|
||||
NvU64 size = UVM_CHUNK_SIZE_MAX;
|
||||
|
||||
range = uvm_kvmalloc_zero(sizeof(*range));
|
||||
@@ -2852,7 +2852,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
|
||||
if (sysmem_mapping->range_vec == NULL) {
|
||||
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
|
||||
NvU64 phys_offset = curr_pa;
|
||||
NvU64 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
uvm_pmm_alloc_flags_t pmm_flags;
|
||||
|
||||
// No eviction is requested when allocating the page tree storage,
|
||||
|
||||
@@ -208,7 +208,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// This is an optimization which reduces TLB pressure, reduces the number of
|
||||
// TLB invalidates we must issue, and means we don't have to initialize the
|
||||
// 4k PTEs which are covered by big PTEs since the MMU will never read them.
|
||||
NvU64 (*unmapped_pte)(NvU64 page_size);
|
||||
NvU64 (*unmapped_pte)(NvU32 page_size);
|
||||
|
||||
// Bit pattern used for debug purposes to clobber PTEs which ought to be
|
||||
// unused. In practice this will generate a PRIV violation or a physical
|
||||
@@ -234,23 +234,23 @@ struct uvm_mmu_mode_hal_struct
|
||||
// For dual PDEs, this is ether 1 or 0, depending on the page size.
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// re-written using make_pde.
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU64 page_size);
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
// number of virtual address bits used to index the directory/table at a
|
||||
// given depth
|
||||
NvU32 (*index_bits)(NvU32 depth, NvU64 page_size);
|
||||
NvU32 (*index_bits)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
// total number of bits that represent the virtual address space
|
||||
NvU32 (*num_va_bits)(void);
|
||||
|
||||
// the size, in bytes, of a directory/table at a given depth.
|
||||
NvLength (*allocation_size)(NvU32 depth, NvU64 page_size);
|
||||
NvLength (*allocation_size)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
// the depth which corresponds to the page tables
|
||||
NvU32 (*page_table_depth)(NvU64 page_size);
|
||||
NvU32 (*page_table_depth)(NvU32 page_size);
|
||||
|
||||
// bitwise-or of supported page sizes
|
||||
NvU64 (*page_sizes)(void);
|
||||
NvU32 (*page_sizes)(void);
|
||||
};
|
||||
|
||||
struct uvm_page_table_range_struct
|
||||
@@ -258,7 +258,7 @@ struct uvm_page_table_range_struct
|
||||
uvm_page_directory_t *table;
|
||||
NvU32 start_index;
|
||||
NvU32 entry_count;
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
@@ -275,7 +275,7 @@ struct uvm_page_tree_struct
|
||||
uvm_page_directory_t *root;
|
||||
uvm_mmu_mode_hal_t *hal;
|
||||
uvm_page_tree_type_t type;
|
||||
NvU64 big_page_size;
|
||||
NvU32 big_page_size;
|
||||
|
||||
// Pointer to the GPU VA space containing the page tree.
|
||||
// This pointer is set only for page trees of type
|
||||
@@ -325,7 +325,7 @@ struct uvm_page_table_range_vec_struct
|
||||
NvU64 size;
|
||||
|
||||
// Page size used for all the page table ranges
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
|
||||
// Page table ranges covering the VA
|
||||
uvm_page_table_range_t *ranges;
|
||||
@@ -352,7 +352,7 @@ void uvm_mmu_init_gpu_peer_addresses(uvm_gpu_t *gpu);
|
||||
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_tree_type_t type,
|
||||
NvU64 big_page_size,
|
||||
NvU32 big_page_size,
|
||||
uvm_aperture_t location,
|
||||
uvm_page_tree_t *tree_out);
|
||||
|
||||
@@ -374,7 +374,7 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
|
||||
// an existing range or change the size of an existing range, use
|
||||
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@@ -384,7 +384,7 @@ NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
//
|
||||
// All pending operations can be waited on with uvm_page_tree_wait().
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@@ -395,7 +395,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
|
||||
// page_size.
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single);
|
||||
@@ -426,7 +426,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
|
||||
// It is the caller's responsibility to initialize the returned table before
|
||||
// calling uvm_page_tree_write_pde.
|
||||
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children);
|
||||
@@ -480,7 +480,7 @@ static uvm_mmu_page_table_alloc_t *uvm_page_tree_pdb(uvm_page_tree_t *tree)
|
||||
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t *range_vec);
|
||||
|
||||
@@ -489,7 +489,7 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t **range_vec_out);
|
||||
|
||||
@@ -601,12 +601,12 @@ void uvm_mmu_chunk_unmap(uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
|
||||
// uvm_parent_gpu_map_cpu_pages for the given GPU.
|
||||
NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size);
|
||||
|
||||
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU64 page_size)
|
||||
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
return 1ull << tree->hal->index_bits(depth, page_size);
|
||||
}
|
||||
|
||||
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
NvU32 depth = tree->hal->page_table_depth(page_size);
|
||||
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
|
||||
@@ -615,21 +615,21 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
|
||||
// the largest page size supported in a given system, which considers the GMMU
|
||||
// and vMMU page sizes and segment sizes.
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
|
||||
|
||||
return (tree->hal->page_sizes() & page_size) != 0;
|
||||
}
|
||||
|
||||
static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_page_size)
|
||||
static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_page_size)
|
||||
{
|
||||
NvU64 gpu_page_sizes = tree->hal->page_sizes();
|
||||
NvU64 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
|
||||
NvU64 page_sizes;
|
||||
NvU64 page_size;
|
||||
NvU32 gpu_page_sizes = tree->hal->page_sizes();
|
||||
NvU32 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
|
||||
NvU32 page_sizes;
|
||||
NvU32 page_size;
|
||||
|
||||
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%llx\n", max_page_size);
|
||||
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%x\n", max_page_size);
|
||||
|
||||
if (max_page_size < smallest_gpu_page_size)
|
||||
return 0;
|
||||
@@ -638,14 +638,14 @@ static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_pa
|
||||
page_sizes = gpu_page_sizes & (max_page_size | (max_page_size - 1));
|
||||
|
||||
// And pick the biggest one of them
|
||||
page_size = 1ULL << __fls(page_sizes);
|
||||
page_size = 1 << __fls(page_sizes);
|
||||
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx", page_size);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x", page_size);
|
||||
|
||||
return page_size;
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU64 page_size;
|
||||
NvU32 page_size;
|
||||
NvU32 depth;
|
||||
uvm_membar_t membar;
|
||||
} fake_tlb_invalidate_t;
|
||||
@@ -153,7 +153,7 @@ static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
@@ -249,11 +249,7 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 expected_depth,
|
||||
bool expected_membar)
|
||||
NvU64 base, NvU64 size, NvU32 page_size, NvU32 expected_depth, bool expected_membar)
|
||||
{
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
@@ -275,7 +271,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return false;
|
||||
}
|
||||
if (inval->page_size != page_size && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected page size %llu, got %llu instead\n", page_size, inval->page_size);
|
||||
UVM_TEST_PRINT("Expected page size %u, got %u instead\n", page_size, inval->page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -284,7 +280,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
@@ -329,7 +325,7 @@ static NV_STATUS test_page_tree_init_kernel(uvm_gpu_t *gpu, NvU32 big_page_size,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_page_table_range_t *range)
|
||||
@@ -345,7 +341,7 @@ static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_page_table_range_t *single)
|
||||
{
|
||||
@@ -359,14 +355,14 @@ static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children)
|
||||
{
|
||||
return uvm_page_tree_alloc_table(tree, page_size, UVM_PMM_ALLOC_FLAGS_NONE, single, children);
|
||||
}
|
||||
|
||||
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start)
|
||||
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start)
|
||||
{
|
||||
uvm_page_table_range_t entry;
|
||||
bool result = true;
|
||||
@@ -382,7 +378,7 @@ static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, N
|
||||
return assert_no_invalidate() && result;
|
||||
}
|
||||
|
||||
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start, NvU32 depth, bool membar)
|
||||
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvU32 depth, bool membar)
|
||||
{
|
||||
uvm_page_table_range_t entry;
|
||||
bool result = true;
|
||||
@@ -936,8 +932,8 @@ static NV_STATUS split_and_free(uvm_gpu_t *gpu)
|
||||
|
||||
static NV_STATUS check_sizes(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU64 user_sizes = UVM_PAGE_SIZE_2M;
|
||||
NvU64 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
|
||||
NvU32 user_sizes = UVM_PAGE_SIZE_2M;
|
||||
NvU32 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
|
||||
|
||||
if (UVM_PAGE_SIZE_64K >= PAGE_SIZE)
|
||||
user_sizes |= UVM_PAGE_SIZE_64K;
|
||||
@@ -1165,7 +1161,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_sizes, const NvU32 page_sizes_count)
|
||||
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_sizes, const NvU32 page_sizes_count)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_tree_t tree;
|
||||
@@ -1181,8 +1177,8 @@ static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_si
|
||||
for (min_index = 0; min_index < page_sizes_count; ++min_index) {
|
||||
for (max_index = min_index; max_index < page_sizes_count; ++max_index) {
|
||||
for (size_index = 0; size_index < ARRAY_SIZE(sizes_in_max_pages); ++size_index) {
|
||||
NvU64 min_page_size = page_sizes[min_index];
|
||||
NvU64 max_page_size = page_sizes[max_index];
|
||||
NvU32 min_page_size = page_sizes[min_index];
|
||||
NvU32 max_page_size = page_sizes[max_index];
|
||||
NvU64 size = (NvU64)sizes_in_max_pages[size_index] * max_page_size;
|
||||
|
||||
TEST_CHECK_GOTO(test_tlb_batch_invalidates_case(&tree,
|
||||
@@ -1286,7 +1282,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
@@ -1307,7 +1303,7 @@ static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
// Test page table range vector APIs.
|
||||
// Notably the test leaks the page_tree and range_vec on error as it's hard to
|
||||
// clean up on failure and the destructors would likely assert.
|
||||
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
|
||||
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_tree_t tree;
|
||||
@@ -1515,7 +1511,7 @@ static uvm_mmu_page_table_alloc_t fake_table_alloc(uvm_aperture_t aperture, NvU6
|
||||
// Queries the supported page sizes of the GPU(uvm_gpu_t) and fills the
|
||||
// page_sizes array up to MAX_NUM_PAGE_SIZE. Returns the number of elements in
|
||||
// page_sizes;
|
||||
size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
|
||||
size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
|
||||
{
|
||||
unsigned long page_size_log2;
|
||||
unsigned long page_sizes_bitvec;
|
||||
@@ -1528,7 +1524,7 @@ size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
|
||||
page_sizes_bitvec = hal->page_sizes();
|
||||
|
||||
for_each_set_bit(page_size_log2, &page_sizes_bitvec, BITS_PER_LONG) {
|
||||
NvU64 page_size = 1ULL << page_size_log2;
|
||||
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
|
||||
UVM_ASSERT(count < MAX_NUM_PAGE_SIZES);
|
||||
page_sizes[count++] = page_size;
|
||||
}
|
||||
@@ -1576,7 +1572,7 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)
|
||||
|
||||
static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
{
|
||||
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU64 pde_bits;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2];
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
|
||||
@@ -1667,7 +1663,7 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
|
||||
static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
@@ -1763,7 +1759,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
|
||||
static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
@@ -1837,7 +1833,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
|
||||
static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 i, num_page_sizes;
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
@@ -1851,7 +1847,7 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
uvm_page_directory_t *dirs[5];
|
||||
size_t i, num_page_sizes;
|
||||
@@ -2294,8 +2290,8 @@ static NV_STATUS fake_gpu_init_hopper(uvm_gpu_t *fake_gpu)
|
||||
static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell)
|
||||
{
|
||||
// create a fake Maxwell GPU for this test.
|
||||
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU64 i, j, big_page_size, page_size;
|
||||
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU32 i, j, big_page_size, page_size;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_maxwell(maxwell) == NV_OK);
|
||||
|
||||
@@ -2324,7 +2320,7 @@ static NV_STATUS pascal_test_page_tree(uvm_gpu_t *pascal)
|
||||
// create a fake Pascal GPU for this test.
|
||||
NvU32 tlb_batch_saved_max_pages;
|
||||
NvU32 i;
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
size_t num_page_sizes;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_pascal(pascal) == NV_OK);
|
||||
@@ -2385,7 +2381,7 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
|
||||
static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
|
||||
{
|
||||
NvU32 i, tlb_batch_saved_max_pages;
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
size_t num_page_sizes;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_ampere(ampere) == NV_OK);
|
||||
|
||||
@@ -92,13 +92,7 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push, uvm_gpu_phys_addre
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb, NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
@@ -133,9 +127,9 @@ void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C06F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
base >>= 12;
|
||||
|
||||
@@ -54,7 +54,7 @@ static NvU32 entries_per_index_pascal(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_pascal(NvU32 depth, NvU64 page_size)
|
||||
static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 5);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
|
||||
@@ -178,7 +178,7 @@ static NvLength entry_size_pascal(NvU32 depth)
|
||||
return 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_pascal(NvU32 depth, NvU64 page_size)
|
||||
static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {2, 9, 9, 8};
|
||||
// some code paths keep on querying this until they get a 0, meaning only the page offset remains.
|
||||
@@ -204,7 +204,7 @@ static NvU32 num_va_bits_pascal(void)
|
||||
return 49;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
|
||||
static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 5);
|
||||
if (depth == 4 && page_size == UVM_PAGE_SIZE_64K)
|
||||
@@ -213,7 +213,7 @@ static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
|
||||
return 4096;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_pascal(NvU64 page_size)
|
||||
static NvU32 page_table_depth_pascal(NvU32 page_size)
|
||||
{
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
return 3;
|
||||
@@ -221,12 +221,12 @@ static NvU32 page_table_depth_pascal(NvU64 page_size)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static NvU64 page_sizes_pascal(void)
|
||||
static NvU32 page_sizes_pascal(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_pascal(NvU64 page_size)
|
||||
static NvU64 unmapped_pte_pascal(NvU32 page_size)
|
||||
{
|
||||
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
|
||||
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
|
||||
@@ -362,7 +362,7 @@ static uvm_mmu_mode_hal_t pascal_mmu_mode_hal =
|
||||
.page_sizes = page_sizes_pascal
|
||||
};
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size)
|
||||
{
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
|
||||
@@ -162,7 +162,7 @@ static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree
|
||||
}
|
||||
|
||||
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
NvU64 big_page_size,
|
||||
NvU32 big_page_size,
|
||||
uvm_va_block_region_t big_pages_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
@@ -245,7 +245,7 @@ static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bi
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
|
||||
{
|
||||
NvU64 big_page_size;
|
||||
NvU32 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
uvm_va_space_t *va_space;
|
||||
const uvm_page_mask_t *thrashing_pages;
|
||||
|
||||
@@ -1987,12 +1987,21 @@ NV_STATUS uvm_perf_thrashing_init(void)
|
||||
UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT,
|
||||
UVM_PERF_THRASHING_PIN_THRESHOLD_MAX);
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
|
||||
|
||||
// In Confidential Computing, the DMA path is slower due to cryptographic
|
||||
// operations & other associated overhead. Enforce a larger window to allow
|
||||
// the thrashing mitigation mechanisms to work properly.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 10);
|
||||
else
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
|
||||
UVM_PERF_THRASHING_NAP_DEFAULT,
|
||||
UVM_PERF_THRASHING_NAP_MAX);
|
||||
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_epoch, UVM_PERF_THRASHING_EPOCH_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER(uvm_perf_thrashing_pin, UVM_PERF_THRASHING_PIN_DEFAULT);
|
||||
|
||||
@@ -1890,11 +1890,8 @@ static uvm_gpu_chunk_t *claim_free_chunk(uvm_pmm_gpu_t *pmm, uvm_pmm_gpu_memory_
|
||||
if (!chunk)
|
||||
goto out;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size,
|
||||
"chunk size %u expected %u\n",
|
||||
uvm_gpu_chunk_get_size(chunk),
|
||||
chunk_size);
|
||||
|
||||
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size, "chunk size %u expected %u\n",
|
||||
uvm_gpu_chunk_get_size(chunk), chunk_size);
|
||||
UVM_ASSERT(chunk->type == type);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
UVM_ASSERT(!chunk_is_in_eviction(pmm, chunk));
|
||||
@@ -2759,7 +2756,7 @@ static bool uvm_pmm_should_inject_pma_eviction_error(uvm_pmm_gpu_t *pmm)
|
||||
// See the documentation of pmaEvictPagesCb_t in pma.h for details of the
|
||||
// expected semantics.
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
@@ -2864,7 +2861,7 @@ error:
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper(void *void_pmm,
|
||||
NvU64 page_size,
|
||||
NvU32 page_size,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -65,30 +65,30 @@
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_CHUNK_SIZE_1 = 1,
|
||||
UVM_CHUNK_SIZE_2 = 2,
|
||||
UVM_CHUNK_SIZE_4 = 4,
|
||||
UVM_CHUNK_SIZE_8 = 8,
|
||||
UVM_CHUNK_SIZE_16 = 16,
|
||||
UVM_CHUNK_SIZE_32 = 32,
|
||||
UVM_CHUNK_SIZE_64 = 64,
|
||||
UVM_CHUNK_SIZE_128 = 128,
|
||||
UVM_CHUNK_SIZE_256 = 256,
|
||||
UVM_CHUNK_SIZE_512 = 512,
|
||||
UVM_CHUNK_SIZE_1K = 1024,
|
||||
UVM_CHUNK_SIZE_2K = 2*1024,
|
||||
UVM_CHUNK_SIZE_4K = 4*1024,
|
||||
UVM_CHUNK_SIZE_8K = 8*1024,
|
||||
UVM_CHUNK_SIZE_16K = 16*1024,
|
||||
UVM_CHUNK_SIZE_32K = 32*1024,
|
||||
UVM_CHUNK_SIZE_64K = 64*1024,
|
||||
UVM_CHUNK_SIZE_128K = 128*1024,
|
||||
UVM_CHUNK_SIZE_256K = 256*1024,
|
||||
UVM_CHUNK_SIZE_512K = 512*1024,
|
||||
UVM_CHUNK_SIZE_1M = 1024*1024,
|
||||
UVM_CHUNK_SIZE_2M = 2*1024*1024,
|
||||
UVM_CHUNK_SIZE_1 = 1ULL,
|
||||
UVM_CHUNK_SIZE_2 = 2ULL,
|
||||
UVM_CHUNK_SIZE_4 = 4ULL,
|
||||
UVM_CHUNK_SIZE_8 = 8ULL,
|
||||
UVM_CHUNK_SIZE_16 = 16ULL,
|
||||
UVM_CHUNK_SIZE_32 = 32ULL,
|
||||
UVM_CHUNK_SIZE_64 = 64ULL,
|
||||
UVM_CHUNK_SIZE_128 = 128ULL,
|
||||
UVM_CHUNK_SIZE_256 = 256ULL,
|
||||
UVM_CHUNK_SIZE_512 = 512ULL,
|
||||
UVM_CHUNK_SIZE_1K = 1024ULL,
|
||||
UVM_CHUNK_SIZE_2K = 2*1024ULL,
|
||||
UVM_CHUNK_SIZE_4K = 4*1024ULL,
|
||||
UVM_CHUNK_SIZE_8K = 8*1024ULL,
|
||||
UVM_CHUNK_SIZE_16K = 16*1024ULL,
|
||||
UVM_CHUNK_SIZE_32K = 32*1024ULL,
|
||||
UVM_CHUNK_SIZE_64K = 64*1024ULL,
|
||||
UVM_CHUNK_SIZE_128K = 128*1024ULL,
|
||||
UVM_CHUNK_SIZE_256K = 256*1024ULL,
|
||||
UVM_CHUNK_SIZE_512K = 512*1024ULL,
|
||||
UVM_CHUNK_SIZE_1M = 1024*1024ULL,
|
||||
UVM_CHUNK_SIZE_2M = 2*1024*1024ULL,
|
||||
UVM_CHUNK_SIZE_MAX = UVM_CHUNK_SIZE_2M,
|
||||
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2
|
||||
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL
|
||||
} uvm_chunk_size_t;
|
||||
|
||||
#define UVM_CHUNK_SIZES_MASK (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -43,7 +43,7 @@ NV_STATUS uvm_pmm_sysmem_init(void)
|
||||
// Ensure that only supported CPU chunk sizes are enabled.
|
||||
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
|
||||
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
|
||||
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
|
||||
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%lx instead\n",
|
||||
uvm_cpu_chunk_allocation_sizes,
|
||||
UVM_CPU_CHUNK_SIZES);
|
||||
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
|
||||
@@ -126,7 +126,7 @@ NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sys
|
||||
NvU64 remove_key;
|
||||
|
||||
for (remove_key = base_key; remove_key < key; ++remove_key)
|
||||
(void *)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
|
||||
status = errno_to_nv_status(ret);
|
||||
@@ -461,12 +461,69 @@ static NvU32 compute_gpu_mappings_entry_index(uvm_parent_processor_mask_t *dma_a
|
||||
return uvm_parent_processor_mask_get_gpu_count(&subset_mask);
|
||||
}
|
||||
|
||||
static void cpu_chunk_release(nv_kref_t *kref)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
|
||||
uvm_parent_processor_mask_t *mapping_mask;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = NULL;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
uvm_assert_mutex_unlocked(&phys_chunk->lock);
|
||||
mapping_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
|
||||
}
|
||||
else {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
mapping_mask = &logical_chunk->mapped_gpus;
|
||||
}
|
||||
|
||||
for_each_parent_id_in_mask(id, mapping_mask) {
|
||||
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, parent_gpu);
|
||||
}
|
||||
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
if (phys_chunk->gpu_mappings.max_entries > 1)
|
||||
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
|
||||
|
||||
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
|
||||
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
|
||||
SetPageDirty(phys_chunk->common.page);
|
||||
|
||||
uvm_kvfree(phys_chunk->dirty_bitmap);
|
||||
|
||||
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
|
||||
put_page(phys_chunk->common.page);
|
||||
}
|
||||
else {
|
||||
uvm_cpu_chunk_free(logical_chunk->parent);
|
||||
}
|
||||
|
||||
uvm_kvfree(chunk);
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
nv_kref_get(&chunk->refcount);
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
nv_kref_put(&chunk->refcount, cpu_chunk_release);
|
||||
}
|
||||
|
||||
static uvm_cpu_physical_chunk_t *get_physical_parent(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
UVM_ASSERT(chunk->page);
|
||||
|
||||
while (uvm_cpu_chunk_is_logical(chunk))
|
||||
while (!uvm_cpu_chunk_is_physical(chunk))
|
||||
chunk = uvm_cpu_chunk_to_logical(chunk)->parent;
|
||||
|
||||
return uvm_cpu_chunk_to_physical(chunk);
|
||||
@@ -524,7 +581,6 @@ static uvm_cpu_phys_mapping_t *chunk_phys_mapping_alloc(uvm_cpu_physical_chunk_t
|
||||
static uvm_cpu_phys_mapping_t *chunk_phys_mapping_get(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gpu_id_t id)
|
||||
{
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test(&chunk->gpu_mappings.dma_addrs_mask, id)) {
|
||||
if (chunk->gpu_mappings.max_entries == 1) {
|
||||
return &chunk->gpu_mappings.static_entry;
|
||||
@@ -542,6 +598,7 @@ static void chunk_inc_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
{
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(chunk, id);
|
||||
UVM_ASSERT(mapping);
|
||||
mapping->map_count++;
|
||||
@@ -551,6 +608,7 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
{
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(chunk, id);
|
||||
UVM_ASSERT(mapping);
|
||||
UVM_ASSERT(mapping->dma_addr && mapping->map_count);
|
||||
@@ -558,8 +616,6 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
if (mapping->map_count == 0) {
|
||||
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
|
||||
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
|
||||
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, mapping->dma_addr, uvm_cpu_chunk_get_size(&chunk->common));
|
||||
mapping->dma_addr = 0;
|
||||
if (chunk->gpu_mappings.max_entries > 1) {
|
||||
@@ -575,7 +631,7 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
}
|
||||
}
|
||||
|
||||
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
@@ -585,41 +641,36 @@ NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
|
||||
if (!uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
|
||||
if (!uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
return 0;
|
||||
|
||||
parent_offset = cpu_chunk_get_phys_index(logical_chunk);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, gpu->parent->id);
|
||||
if (mapping &&
|
||||
(uvm_cpu_chunk_is_logical(chunk) ||
|
||||
uvm_sub_processor_mask_test(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id))))
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
|
||||
if (mapping)
|
||||
dma_addr = mapping->dma_addr + (parent_offset * PAGE_SIZE);
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
// Create a DMA mapping for the chunk on the given GPU. This will map the
|
||||
// entire physical chunk on the parent GPU and record that a given MIG
|
||||
// partition is using the mapping.
|
||||
// Create a DMA mapping for the chunk on the given parent GPU. This will map the
|
||||
// entire parent physical chunk on the GPU.
|
||||
//
|
||||
// Returns NV_OK on success. On error, any of the errors returned by
|
||||
// uvm_parent_gpu_map_cpu_pages() can be returned. In the case that the DMA
|
||||
// mapping structure could not be allocated, NV_ERR_NO_MEMORY is returned.
|
||||
static NV_STATUS cpu_chunk_map_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
if (uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
|
||||
if (uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -628,6 +679,7 @@ static NV_STATUS cpu_chunk_map_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
|
||||
if (!uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id)) {
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(&phys_chunk->common);
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
NvU64 dma_addr;
|
||||
|
||||
status = uvm_parent_gpu_map_cpu_pages(parent_gpu, phys_chunk->common.page, chunk_size, &dma_addr);
|
||||
@@ -643,59 +695,39 @@ static NV_STATUS cpu_chunk_map_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
|
||||
mapping->dma_addr = dma_addr;
|
||||
mapping->map_count = 1;
|
||||
uvm_sub_processor_mask_zero(&mapping->sub_processors);
|
||||
if (!logical_chunk)
|
||||
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
uvm_parent_processor_mask_set(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id);
|
||||
}
|
||||
else {
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
|
||||
UVM_ASSERT(mapping);
|
||||
|
||||
// Increment the map_count for logical chunks or the first time a
|
||||
// MIG partition is sharing a physical chunk.
|
||||
if (logical_chunk ||
|
||||
!uvm_sub_processor_mask_test_and_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id)))
|
||||
mapping->map_count++;
|
||||
}
|
||||
|
||||
if (logical_chunk) {
|
||||
uvm_processor_mask_set(&logical_chunk->mapped_gpus, gpu->id);
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
UVM_ASSERT(uvm_sub_processor_mask_get_count(&mapping->sub_processors) == mapping->map_count);
|
||||
// The mapping count on the physical chunk is only increased when
|
||||
// mapping logical chunks.
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
chunk_inc_gpu_mapping(phys_chunk, parent_gpu->id);
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
if (status == NV_OK && uvm_cpu_chunk_is_logical(chunk))
|
||||
uvm_parent_processor_mask_set(&logical_chunk->mapped_gpus, parent_gpu->id);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void cpu_chunk_unmap_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_id_t gpu_id)
|
||||
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_parent_gpu_id_t id = uvm_parent_gpu_id_from_gpu_id(gpu_id);
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
uvm_processor_mask_t *mapping_mask = &uvm_cpu_chunk_to_logical(chunk)->mapped_gpus;
|
||||
|
||||
if (uvm_processor_mask_test_and_clear(mapping_mask, gpu_id))
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
if (!uvm_parent_processor_mask_test_and_clear(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
return;
|
||||
}
|
||||
else {
|
||||
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, id)) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
|
||||
|
||||
if (uvm_sub_processor_mask_test_and_clear(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id)))
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
}
|
||||
}
|
||||
phys_chunk = get_physical_parent(chunk);
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id))
|
||||
chunk_dec_gpu_mapping(phys_chunk, parent_gpu->id);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
}
|
||||
@@ -705,112 +737,17 @@ NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
NV_STATUS status;
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
|
||||
status = cpu_chunk_map_gpu_phys(chunk, gpu);
|
||||
status = cpu_chunk_map_parent_gpu_phys(chunk, gpu->parent);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu), chunk_size);
|
||||
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent), chunk_size);
|
||||
if (status != NV_OK)
|
||||
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
|
||||
|
||||
// Note: there is no corresponding uvm_mmu_sysmem_unmap() for
|
||||
// uvm_mmu_sysmem_map().
|
||||
}
|
||||
|
||||
static void cpu_logical_chunk_release(uvm_cpu_logical_chunk_t *logical_chunk)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(logical_chunk->parent);
|
||||
uvm_processor_id_t gpu_id;
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
|
||||
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
uvm_cpu_chunk_free(logical_chunk->parent);
|
||||
}
|
||||
|
||||
static void cpu_physical_chunk_release(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
uvm_parent_processor_id_t id;
|
||||
|
||||
uvm_assert_mutex_unlocked(&phys_chunk->lock);
|
||||
|
||||
// There should be no other threads using this chunk but we lock it because
|
||||
// of assertions in chunk_phys_mapping_get() and chunk_dec_gpu_mapping().
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for_each_parent_id_in_mask(id, &phys_chunk->gpu_mappings.dma_addrs_mask) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
|
||||
NvU32 count;
|
||||
|
||||
UVM_ASSERT(mapping);
|
||||
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
|
||||
// Get a count of set bits in the sub_processors mask then clear it so
|
||||
// that chunk_dec_gpu_mapping() sees an empty mask when map_count == 0.
|
||||
// Using for_each_sub_processor_in_mask could try to dereference
|
||||
// mapping after map_count == 0 in the loop below.
|
||||
count = uvm_sub_processor_mask_get_count(&mapping->sub_processors);
|
||||
uvm_sub_processor_mask_zero(&mapping->sub_processors);
|
||||
|
||||
for (; count; count--)
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
UVM_ASSERT(uvm_parent_processor_mask_empty(&phys_chunk->gpu_mappings.dma_addrs_mask));
|
||||
|
||||
if (phys_chunk->gpu_mappings.max_entries > 1)
|
||||
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
|
||||
|
||||
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
|
||||
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
|
||||
SetPageDirty(chunk->page);
|
||||
|
||||
uvm_kvfree(phys_chunk->dirty_bitmap);
|
||||
|
||||
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
|
||||
put_page(chunk->page);
|
||||
}
|
||||
|
||||
static void cpu_chunk_release(nv_kref_t *kref)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
cpu_logical_chunk_release(uvm_cpu_chunk_to_logical(chunk));
|
||||
else
|
||||
cpu_physical_chunk_release(chunk);
|
||||
|
||||
uvm_kvfree(chunk);
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
nv_kref_get(&chunk->refcount);
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
nv_kref_put(&chunk->refcount, cpu_chunk_release);
|
||||
}
|
||||
|
||||
static struct page *uvm_cpu_chunk_alloc_page(uvm_chunk_size_t alloc_size,
|
||||
int nid,
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags)
|
||||
@@ -939,37 +876,14 @@ int uvm_cpu_chunk_get_numa_node(uvm_cpu_chunk_t *chunk)
|
||||
return page_to_nid(chunk->page);
|
||||
}
|
||||
|
||||
// Convert the mask of DMA mapped parent GPUs and the sub-processor mask into
|
||||
// one uvm_processor_mask_t in 'dma_map_mask'.
|
||||
static void get_dma_map_mask(uvm_cpu_physical_chunk_t *chunk, uvm_processor_mask_t *dma_map_mask)
|
||||
{
|
||||
uvm_parent_processor_id_t id;
|
||||
NvU32 sub_index;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
|
||||
for_each_parent_id_in_mask(id, &chunk->gpu_mappings.dma_addrs_mask) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(chunk, id);
|
||||
|
||||
for_each_sub_processor_index_in_mask(sub_index, &mapping->sub_processors) {
|
||||
uvm_processor_id_t gpu_id = uvm_gpu_id_from_sub_processor(id, sub_index);
|
||||
|
||||
uvm_sub_processor_mask_clear(&mapping->sub_processors, sub_index);
|
||||
uvm_processor_mask_set(dma_map_mask, gpu_id);
|
||||
}
|
||||
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_logical_chunk_t *new_chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
uvm_processor_id_t gpu_id;
|
||||
uvm_processor_mask_t *dma_map_mask = NULL;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_parent_processor_mask_t *dma_map_mask;
|
||||
uvm_chunk_size_t new_size;
|
||||
size_t num_new_chunks;
|
||||
size_t num_subchunk_pages;
|
||||
@@ -988,20 +902,21 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
|
||||
|
||||
// Get the largest size below the size of the input chunk.
|
||||
new_size = uvm_chunk_find_prev_size(uvm_cpu_chunk_get_allocation_sizes(), uvm_cpu_chunk_get_size(chunk));
|
||||
UVM_ASSERT(new_size);
|
||||
UVM_ASSERT(new_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
|
||||
num_subchunk_pages = new_size / PAGE_SIZE;
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
dma_map_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
|
||||
}
|
||||
else {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
dma_map_mask = &logical_chunk->mapped_gpus;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for (i = 0; i < num_new_chunks; i++) {
|
||||
new_chunk = uvm_kvmalloc_zero(sizeof(*new_chunk));
|
||||
new_chunk = uvm_kvmalloc_zero(sizeof(*logical_chunk));
|
||||
if (!new_chunk) {
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
@@ -1014,25 +929,19 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
|
||||
nv_kref_init(&new_chunk->common.refcount);
|
||||
new_chunk->parent = chunk;
|
||||
uvm_cpu_chunk_get(new_chunk->parent);
|
||||
if (i == 0 && !logical_chunk) {
|
||||
dma_map_mask = &new_chunk->mapped_gpus;
|
||||
get_dma_map_mask(phys_chunk, dma_map_mask);
|
||||
}
|
||||
else {
|
||||
uvm_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
|
||||
}
|
||||
for_each_id_in_mask(gpu_id, dma_map_mask)
|
||||
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
for_each_parent_id_in_mask(id, dma_map_mask)
|
||||
chunk_inc_gpu_mapping(phys_chunk, id);
|
||||
uvm_parent_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
|
||||
new_chunks[i] = &new_chunk->common;
|
||||
}
|
||||
|
||||
// Release the references that are held by the chunk being split.
|
||||
for_each_id_in_mask(gpu_id, dma_map_mask)
|
||||
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
for_each_parent_id_in_mask(id, dma_map_mask)
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
|
||||
// If the chunk being split is a logical chunk clear it's mapped_gpus mask.
|
||||
if (logical_chunk)
|
||||
uvm_processor_mask_zero(&logical_chunk->mapped_gpus);
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
uvm_parent_processor_mask_zero(&logical_chunk->mapped_gpus);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
@@ -1054,7 +963,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
|
||||
{
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
uvm_cpu_chunk_t *first_chunk_parent;
|
||||
uvm_processor_mask_t *first_chunk_mapped_gpus;
|
||||
uvm_parent_processor_mask_t *first_chunk_mapped_gpus;
|
||||
uvm_chunk_size_t first_chunk_size;
|
||||
size_t i;
|
||||
|
||||
@@ -1085,7 +994,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
|
||||
// 2.1 All mappings to GPUs in each of child chunks' masks that are
|
||||
// not also present in the parent chunk's mask are destroyed.
|
||||
// 2.2 mapped_gpus mask of the parent chunk remains unmodified.
|
||||
UVM_ASSERT(uvm_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
|
||||
UVM_ASSERT(uvm_parent_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -1096,14 +1005,14 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
|
||||
uvm_cpu_chunk_t *parent;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_processor_id_t gpu_id;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
uvm_chunk_size_t parent_chunk_size;
|
||||
size_t num_merge_chunks;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(chunks);
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_logical(chunks[0]));
|
||||
UVM_ASSERT(!uvm_cpu_chunk_is_physical(chunks[0]));
|
||||
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunks[0]);
|
||||
parent = logical_chunk->parent;
|
||||
@@ -1124,22 +1033,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
|
||||
phys_chunk = get_physical_parent(chunks[0]);
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
for_each_parent_id_in_mask(id, &logical_chunk->mapped_gpus)
|
||||
chunk_inc_gpu_mapping(phys_chunk, id);
|
||||
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
|
||||
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(parent)) {
|
||||
uvm_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
|
||||
}
|
||||
else {
|
||||
// Restore the mapping->sub_processors mask for each mapped GPU.
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
UVM_ASSERT(mapping);
|
||||
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id));
|
||||
}
|
||||
}
|
||||
if (!uvm_cpu_chunk_is_physical(parent))
|
||||
uvm_parent_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -246,19 +246,8 @@ struct uvm_cpu_chunk_struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Physical GPU DMA address of the CPU chunk.
|
||||
NvU64 dma_addr;
|
||||
|
||||
// Reference count of all sub_processors using this mapping across logical
|
||||
// and physical chunks.
|
||||
NvU32 map_count;
|
||||
|
||||
// Mask of MIG instances or physical GPU.
|
||||
// This is only valid for physical CPU chunks that have not been split into
|
||||
// logical chunks. When the chunk is split, all the
|
||||
// uvm_cpu_logical_chunk_t::mapped_gpus masks have a bit set for each
|
||||
// count in map_count and sub_processors is set to zero.
|
||||
uvm_sub_processor_mask_t sub_processors;
|
||||
} uvm_cpu_phys_mapping_t;
|
||||
|
||||
typedef struct
|
||||
@@ -315,9 +304,7 @@ typedef struct
|
||||
|
||||
// Pointer to the parent chunk (which could also be a logical chunk).
|
||||
uvm_cpu_chunk_t *parent;
|
||||
|
||||
// This is a reference per bit but also recorded in mapping->map_count.
|
||||
uvm_processor_mask_t mapped_gpus;
|
||||
uvm_parent_processor_mask_t mapped_gpus;
|
||||
} uvm_cpu_logical_chunk_t;
|
||||
|
||||
// Return the set of allowed CPU chunk allocation sizes.
|
||||
@@ -430,15 +417,15 @@ void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
|
||||
// For more details see uvm_mmu_sysmem_map().
|
||||
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
|
||||
// Destroy a CPU chunk's DMA mapping for the given GPU.
|
||||
// Destroy a CPU chunk's DMA mapping for the parent GPU.
|
||||
// If chunk is a logical chunk, this call may not necessarily destroy the DMA
|
||||
// mapping of the parent physical chunk since all logical chunks and MIG
|
||||
// partitions share the parent's DMA mapping.
|
||||
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
// mapping of the parent physical chunk since all logical chunks share the
|
||||
// parent's DMA mapping.
|
||||
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Get the CPU chunk's DMA mapping address for the specified GPU ID.
|
||||
// If there is no mapping for the GPU, 0 is returned.
|
||||
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Split a CPU chunk into a set of CPU chunks of the next size down from the set
|
||||
// of enabled CPU chunk sizes.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -626,7 +626,7 @@ static NV_STATUS test_cpu_chunk_mapping_access(uvm_cpu_chunk_t *chunk, uvm_gpu_t
|
||||
TEST_NV_CHECK_RET(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr));
|
||||
memset(cpu_addr, 0, chunk_size);
|
||||
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
gpu_addr = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr));
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
@@ -733,21 +733,21 @@ static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
|
||||
// - no GPU mapping address.
|
||||
TEST_CHECK_GOTO(phys_chunk->gpu_mappings.max_entries == 1, done);
|
||||
TEST_CHECK_GOTO(uvm_parent_processor_mask_get_gpu_count(&phys_chunk->gpu_mappings.dma_addrs_mask) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
|
||||
// Test basic access.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test double map is harmless.
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == dma_addr, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == dma_addr, done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test unmap, remap.
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
@@ -768,39 +768,6 @@ static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_allo
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// TODO: Bug 4351121: This won't actually test anything until uvm_test
|
||||
// enumerates multiple MIG instances.
|
||||
static NV_STATUS test_cpu_chunk_mig(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr_gpu0;
|
||||
|
||||
UVM_ASSERT(gpu0->parent == gpu1->parent);
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
|
||||
// MIG instances in the same physical GPU share the same DMA addresses.
|
||||
dma_addr_gpu0 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu0, done);
|
||||
|
||||
// Unmapping one GPU shouldn't affect the other.
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@@ -816,8 +783,8 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
dma_addr_gpu1 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu2);
|
||||
dma_addr_gpu1 = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu2->parent);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
|
||||
@@ -831,9 +798,7 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
|
||||
// GPU1. It's true that we may get a false negative if both addresses
|
||||
// happened to alias and we had a bug in how the addresses are shifted in
|
||||
// the dense array, but that's better than intermittent failure.
|
||||
// Also note that multiple MIG instances in the same physical GPU share the
|
||||
// parent's physical DMA mapping.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu1, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent) == dma_addr_gpu1, done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
@@ -863,7 +828,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done_free);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
|
||||
@@ -880,14 +845,13 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
|
||||
|
||||
// Test that GPU mappings are transferred after a split
|
||||
phys_dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
phys_dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
@@ -895,9 +859,9 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
NvU64 dma_addr;
|
||||
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu);
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent);
|
||||
TEST_CHECK_GOTO(dma_addr == phys_dma_addr + (i * split_size), done);
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
|
||||
}
|
||||
|
||||
// Test that mapping one logical chunk does not affect others.
|
||||
@@ -907,7 +871,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
if (i != map_chunk)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent) == 0, done);
|
||||
}
|
||||
|
||||
if (split_size > PAGE_SIZE) {
|
||||
@@ -963,118 +927,6 @@ static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS do_test_cpu_chunk_split_and_merge_2(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t num_split_chunks;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
uvm_cpu_chunk_t *merged_chunk;
|
||||
uvm_chunk_size_t split_size;
|
||||
size_t i;
|
||||
|
||||
split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
|
||||
UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_split_chunks = size / split_size;
|
||||
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
|
||||
|
||||
if (!split_chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Map both GPUs.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done_free);
|
||||
|
||||
// Then split.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
|
||||
|
||||
// Unmap gpu0 from all split chunks.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
TEST_CHECK_GOTO(split_chunks[i], done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[i]), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[i]) == split_size, done);
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu0) == 0, done);
|
||||
|
||||
// Test that gpu1 still has access.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu1), done);
|
||||
}
|
||||
|
||||
// Test CPU chunk merging.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(merged_chunk, gpu0) == 0, done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
|
||||
|
||||
// Unmap gpu1 so we start with a fully unmapped physical chunk.
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu1);
|
||||
|
||||
// Split the physical chunk.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
// Now map everything.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu1), done);
|
||||
}
|
||||
|
||||
// Test CPU chunk merging with everything mapped.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
|
||||
// At this point, all split chunks have been merged.
|
||||
num_split_chunks = 0;
|
||||
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
|
||||
|
||||
done:
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
|
||||
done_free:
|
||||
uvm_kvfree(split_chunks);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_split_and_merge_2(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t size;
|
||||
|
||||
size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
|
||||
for_each_chunk_size_from(size, alloc_sizes) {
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
NV_STATUS status;
|
||||
|
||||
// It is possible that the allocation fails due to lack of large pages
|
||||
// rather than an API issue, which will result in a false negative.
|
||||
// However, that should be very rare.
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
status = do_test_cpu_chunk_split_and_merge_2(chunk, gpu0, gpu1);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_dirty_split(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
@@ -1220,9 +1072,7 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
|
||||
uvm_va_space_t *va_space,
|
||||
const uvm_processor_mask_t *test_gpus)
|
||||
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
@@ -1249,7 +1099,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
|
||||
chunk = NULL;
|
||||
|
||||
// Map every other chunk.
|
||||
// The call to uvm_cpu_chunk_unmap_gpu() is here in case this
|
||||
// The call to uvm_cpu_chunk_unmap_parent_gpu_phys() is here in case this
|
||||
// is part of a double split (see below). In that case, the parent chunk
|
||||
// would be either mapped or unmapped.
|
||||
//
|
||||
@@ -1261,7 +1111,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
|
||||
if (i & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
|
||||
else
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1297,9 +1147,9 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[j]) == split_size, done);
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
|
||||
if (j & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1318,8 +1168,7 @@ done_free:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space,
|
||||
const uvm_processor_mask_t *test_gpus)
|
||||
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
@@ -1355,50 +1204,6 @@ static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_gpu_t *find_first_parent_gpu(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
return uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *find_next_parent_gpu(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space,
|
||||
uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *next_gpu = gpu;
|
||||
|
||||
while (next_gpu) {
|
||||
next_gpu = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, next_gpu);
|
||||
if (!next_gpu || next_gpu->parent != gpu->parent)
|
||||
break;
|
||||
}
|
||||
|
||||
return next_gpu;
|
||||
}
|
||||
|
||||
static void find_shared_gpu_pair(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space,
|
||||
uvm_gpu_t **out_gpu0,
|
||||
uvm_gpu_t **out_gpu1)
|
||||
{
|
||||
uvm_gpu_t *gpu0 = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
uvm_gpu_t *gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
|
||||
|
||||
while (gpu1) {
|
||||
if (gpu0->parent == gpu1->parent) {
|
||||
*out_gpu0 = gpu0;
|
||||
*out_gpu1 = gpu1;
|
||||
return;
|
||||
}
|
||||
|
||||
gpu0 = gpu1;
|
||||
gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
|
||||
}
|
||||
|
||||
*out_gpu0 = NULL;
|
||||
*out_gpu1 = NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@@ -1423,29 +1228,13 @@ NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct f
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, test_gpus), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
|
||||
|
||||
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 2) {
|
||||
uvm_gpu_t *gpu2, *gpu3 = NULL;
|
||||
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 3) {
|
||||
uvm_gpu_t *gpu2, *gpu3;
|
||||
|
||||
// Look for a pair of GPUs that don't share a common parent.
|
||||
gpu = find_first_parent_gpu(test_gpus, va_space);
|
||||
gpu2 = find_next_parent_gpu(test_gpus, va_space, gpu);
|
||||
if (gpu2) {
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
|
||||
|
||||
// Look for a third physical GPU.
|
||||
gpu3 = find_next_parent_gpu(test_gpus, va_space, gpu2);
|
||||
|
||||
if (gpu3)
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||
}
|
||||
|
||||
// Look for a pair of GPUs that share a common parent.
|
||||
find_shared_gpu_pair(test_gpus, va_space, &gpu, &gpu2);
|
||||
if (gpu) {
|
||||
// Test MIG instances within the same parent GPU.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mig(gpu, gpu2), done);
|
||||
}
|
||||
gpu = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
gpu2 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu);
|
||||
gpu3 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu2);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||
}
|
||||
|
||||
done:
|
||||
|
||||
@@ -1127,7 +1127,6 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
// incrementally. Therefore, the reverse translations will show them in
|
||||
// order.
|
||||
uvm_for_each_va_range_in(va_range, va_space, addr, addr + size - 1) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
NvU32 num_va_block_pages = 0;
|
||||
|
||||
@@ -671,6 +671,9 @@ static NV_STATUS va_block_set_read_duplication_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
// Force CPU page residency to be on the preferred NUMA node.
|
||||
va_block_context->make_resident.dest_nid = uvm_va_range_get_policy(va_block->va_range)->preferred_nid;
|
||||
|
||||
for_each_id_in_mask(src_id, &va_block->resident) {
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023-2024 NVIDIA Corporation
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -30,8 +30,6 @@ const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
|
||||
|
||||
NV_STATUS uvm_processor_mask_cache_init(void)
|
||||
{
|
||||
BUILD_BUG_ON((8 * sizeof(((uvm_sub_processor_mask_t *)0)->bitmap)) < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
g_uvm_processor_mask_cache = NV_KMEM_CACHE_CREATE("uvm_processor_mask_t", uvm_processor_mask_t);
|
||||
if (!g_uvm_processor_mask_cache)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
@@ -102,16 +100,8 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
|
||||
bool uvm_numa_id_eq(int nid0, int nid1)
|
||||
{
|
||||
UVM_ASSERT(nid0 == -1 || nid0 < MAX_NUMNODES);
|
||||
UVM_ASSERT(nid1 == -1 || nid1 < MAX_NUMNODES);
|
||||
|
||||
if ((nid0 == NUMA_NO_NODE || nid1 == NUMA_NO_NODE) && nodes_weight(node_possible_map) == 1) {
|
||||
if (nid0 == NUMA_NO_NODE)
|
||||
nid0 = first_node(node_possible_map);
|
||||
|
||||
if (nid1 == NUMA_NO_NODE)
|
||||
nid1 = first_node(node_possible_map);
|
||||
}
|
||||
UVM_ASSERT(nid0 >= NUMA_NO_NODE && nid0 < MAX_NUMNODES);
|
||||
UVM_ASSERT(nid1 >= NUMA_NO_NODE && nid1 < MAX_NUMNODES);
|
||||
|
||||
return nid0 == nid1;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -277,6 +277,8 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
#define UVM_PARENT_ID_MAX_GPUS NV_MAX_DEVICES
|
||||
#define UVM_PARENT_ID_MAX_PROCESSORS (UVM_PARENT_ID_MAX_GPUS + 1)
|
||||
|
||||
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
|
||||
|
||||
#define UVM_ID_MAX_GPUS (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_ID_MAX_PROCESSORS (UVM_ID_MAX_GPUS + 1)
|
||||
#define UVM_MAX_UNIQUE_GPU_PAIRS SUM_FROM_0_TO_N(UVM_ID_MAX_GPUS - 1)
|
||||
@@ -290,9 +292,6 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
|
||||
#define UVM_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_ID_MAX_PROCESSORS, "id %u\n", id.val)
|
||||
|
||||
#define UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index) \
|
||||
UVM_ASSERT_MSG((sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS, "sub_index %u\n", (sub_index))
|
||||
|
||||
static int uvm_parent_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id1);
|
||||
@@ -494,16 +493,11 @@ static uvm_gpu_id_t uvm_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor_index(NvU32 index, NvU32 sub_index)
|
||||
{
|
||||
UVM_ASSERT(index < UVM_PARENT_ID_MAX_GPUS);
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
UVM_ASSERT(sub_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
return uvm_gpu_id_from_index(index * UVM_PARENT_ID_MAX_SUB_PROCESSORS + sub_index);
|
||||
}
|
||||
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_t id, NvU32 sub_index)
|
||||
{
|
||||
return uvm_gpu_id_from_sub_processor_index(uvm_parent_id_gpu_index(id), sub_index);
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_gpu_id(const uvm_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
@@ -531,71 +525,6 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
|
||||
|
||||
// This is similar to uvm_parent_processor_mask_t and uvm_processor_mask_t
|
||||
// but defined as a NvU8 in order to save memory since DECLARE_BITMAP() uses
|
||||
// unsigned long. It also means we need to define our own bitops.
|
||||
// Note that these are not atomic operations.
|
||||
typedef struct
|
||||
{
|
||||
NvU8 bitmap;
|
||||
} uvm_sub_processor_mask_t;
|
||||
|
||||
static bool uvm_sub_processor_mask_test(const uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
return mask->bitmap & (1 << sub_index);
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
mask->bitmap |= 1 << sub_index;
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
mask->bitmap &= ~(1 << sub_index);
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_test_and_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
bool result = uvm_sub_processor_mask_test(mask, sub_index);
|
||||
|
||||
if (!result)
|
||||
uvm_sub_processor_mask_set(mask, sub_index);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_test_and_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
bool result = uvm_sub_processor_mask_test(mask, sub_index);
|
||||
|
||||
if (result)
|
||||
uvm_sub_processor_mask_clear(mask, sub_index);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_zero(uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
mask->bitmap = 0;
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_empty(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
return mask->bitmap == 0;
|
||||
}
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_get_count(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
return hweight8(mask->bitmap);
|
||||
}
|
||||
|
||||
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
|
||||
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
|
||||
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
|
||||
@@ -642,28 +571,8 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
i = uvm_gpu_id_next(i))
|
||||
|
||||
// Helper to iterate over all sub processor indexes.
|
||||
#define for_each_sub_processor_index(sub_index) \
|
||||
for ((sub_index) = 0; (sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; (sub_index)++)
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_find_first_index(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
unsigned long bitmap = mask->bitmap;
|
||||
|
||||
return find_first_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
}
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_find_next_index(const uvm_sub_processor_mask_t *mask, NvU32 min_index)
|
||||
{
|
||||
unsigned long bitmap = mask->bitmap;
|
||||
|
||||
return find_next_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS, min_index);
|
||||
}
|
||||
|
||||
// Helper to iterate over all sub processor indexes in a given mask.
|
||||
#define for_each_sub_processor_index_in_mask(sub_index, sub_mask) \
|
||||
for ((sub_index) = uvm_sub_processor_mask_find_first_index((sub_mask)); \
|
||||
(sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; \
|
||||
(sub_index) = uvm_sub_processor_mask_find_next_index((sub_mask), (sub_index) + 1))
|
||||
#define for_each_sub_processor_index(i) \
|
||||
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
|
||||
|
||||
// Helper to iterate over all valid processor ids.
|
||||
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
|
||||
|
||||
@@ -65,9 +65,12 @@ typedef enum
|
||||
} uvm_push_flag_t;
|
||||
|
||||
struct uvm_push_crypto_bundle_struct {
|
||||
// Initialization vector used to decrypt the push
|
||||
// Initialization vector used to decrypt the push on the CPU
|
||||
UvmCslIv iv;
|
||||
|
||||
// Key version used to decrypt the push on the CPU
|
||||
NvU32 key_version;
|
||||
|
||||
// Size of the pushbuffer that is encrypted/decrypted
|
||||
NvU32 push_size;
|
||||
};
|
||||
|
||||
@@ -451,7 +451,6 @@ static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm
|
||||
static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 auth_tag_offset;
|
||||
void *auth_tag_cpu_va;
|
||||
void *push_protected_cpu_va;
|
||||
void *push_unprotected_cpu_va;
|
||||
@@ -470,16 +469,15 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
|
||||
UVM_ASSERT(!uvm_channel_is_wlc(channel));
|
||||
UVM_ASSERT(!uvm_channel_is_lcic(channel));
|
||||
|
||||
push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset;
|
||||
push_protected_cpu_va = get_base_cpu_va(pushbuffer) + pushbuffer_offset;
|
||||
push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset;
|
||||
auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) +
|
||||
auth_tag_offset;
|
||||
auth_tag_cpu_va = uvm_channel_get_push_crypto_bundle_auth_tags_cpu_va(channel, push_info_index);
|
||||
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
push_protected_cpu_va,
|
||||
push_unprotected_cpu_va,
|
||||
&crypto_bundle->iv,
|
||||
crypto_bundle->key_version,
|
||||
crypto_bundle->push_size,
|
||||
auth_tag_cpu_va);
|
||||
|
||||
@@ -558,7 +556,7 @@ NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_
|
||||
if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
|
||||
// We need to use the same static locations for PB as the fixed
|
||||
// schedule because that's what the channels are initialized to use.
|
||||
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu);
|
||||
return uvm_channel_get_static_pb_protected_vidmem_gpu_va(push->channel);
|
||||
}
|
||||
else if (uvm_channel_is_sec2(push->channel)) {
|
||||
// SEC2 PBs are in unprotected sysmem
|
||||
@@ -575,7 +573,7 @@ void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffe
|
||||
if (uvm_channel_is_wlc(push->channel)) {
|
||||
// Reuse existing WLC static pb for initialization
|
||||
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
||||
return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu;
|
||||
return uvm_channel_get_static_pb_unprotected_sysmem_cpu(push->channel);
|
||||
}
|
||||
|
||||
pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
|
||||
@@ -590,8 +588,8 @@ NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffe
|
||||
if (uvm_channel_is_wlc(push->channel)) {
|
||||
// Reuse existing WLC static pb for initialization
|
||||
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
||||
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem,
|
||||
uvm_push_get_gpu(push));
|
||||
|
||||
return uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(push->channel);
|
||||
}
|
||||
|
||||
pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
|
||||
|
||||
@@ -322,6 +322,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
size_t copy_size)
|
||||
@@ -338,6 +339,7 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
&decrypt_iv[i],
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer));
|
||||
|
||||
@@ -368,7 +370,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
uvm_gpu_address_t auth_tag_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu);
|
||||
|
||||
for (i = 0; i < num_iterations; i++) {
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@@ -427,6 +429,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
uvm_push_t push;
|
||||
UvmCslIv *decrypt_iv;
|
||||
NvU32 key_version;
|
||||
|
||||
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
|
||||
if (!decrypt_iv)
|
||||
@@ -456,6 +459,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher), out);
|
||||
@@ -465,6 +473,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
|
||||
dst_plain_cpu,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
||||
@@ -124,24 +124,23 @@ static NV_STATUS uvm_test_verify_bh_affinity(uvm_intr_handler_t *isr, int node)
|
||||
static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status;
|
||||
uvm_rm_user_object_t user_rm_va_space = {
|
||||
.rm_control_fd = -1,
|
||||
.user_client = params->client,
|
||||
.user_object = params->smc_part_ref
|
||||
};
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!UVM_THREAD_AFFINITY_SUPPORTED())
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
status = uvm_gpu_retain_by_uuid(¶ms->gpu_uuid, &user_rm_va_space, &gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
gpu = uvm_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (!gpu) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
// If the GPU is not attached to a NUMA node, there is nothing to do.
|
||||
if (gpu->parent->closest_cpu_numa_node == NUMA_NO_NODE) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto release;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (gpu->parent->replayable_faults_supported) {
|
||||
@@ -150,7 +149,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto release;
|
||||
goto unlock;
|
||||
|
||||
if (gpu->parent->non_replayable_faults_supported) {
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
|
||||
@@ -158,7 +157,7 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto release;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (gpu->parent->access_counters_supported) {
|
||||
@@ -168,8 +167,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
}
|
||||
}
|
||||
release:
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVidia Corporation
|
||||
Copyright (c) 2015-2022 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -191,7 +191,7 @@ typedef struct
|
||||
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
|
||||
NvProcessorUuid preferred_location; // Out
|
||||
NvS32 preferred_cpu_nid; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 accessed_by_count; // Out
|
||||
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
|
||||
union
|
||||
@@ -347,20 +347,30 @@ typedef enum
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH = 0,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_STREAM,
|
||||
UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION,
|
||||
} UVM_TEST_CHANNEL_STRESS_MODE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU,
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU,
|
||||
UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE,
|
||||
} UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION;
|
||||
|
||||
#define UVM_TEST_CHANNEL_STRESS UVM_TEST_IOCTL_BASE(15)
|
||||
typedef struct
|
||||
{
|
||||
NvU32 mode; // In
|
||||
NvU32 mode; // In, one of UVM_TEST_CHANNEL_STRESS_MODE
|
||||
|
||||
// Number of iterations:
|
||||
// mode == NOOP_PUSH: number of noop pushes
|
||||
// mode == UPDATE_CHANNELS: number of updates
|
||||
// mode == STREAM: number of iterations per stream
|
||||
// mode == ROTATION: number of operations
|
||||
NvU32 iterations;
|
||||
|
||||
NvU32 num_streams; // In, used only for mode == UVM_TEST_CHANNEL_STRESS_MODE_STREAM
|
||||
NvU32 num_streams; // In, used only if mode == STREAM
|
||||
NvU32 key_rotation_operation; // In, used only if mode == ROTATION
|
||||
NvU32 seed; // In
|
||||
NvU32 verbose; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
@@ -624,7 +634,7 @@ typedef struct
|
||||
|
||||
// Array of processors which have a resident copy of the page containing
|
||||
// lookup_address.
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 resident_on_count; // Out
|
||||
|
||||
// If the memory is resident on the CPU, the NUMA node on which the page
|
||||
@@ -635,24 +645,24 @@ typedef struct
|
||||
// system-page-sized portion of this allocation which contains
|
||||
// lookup_address is guaranteed to be resident on the corresponding
|
||||
// processor.
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// The physical address of the physical allocation backing lookup_address.
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
|
||||
// Array of processors which have a virtual mapping covering lookup_address.
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
NvU32 mapped_on_count; // Out
|
||||
|
||||
// The size of the virtual mapping covering lookup_address on each
|
||||
// mapped_on processor.
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// Array of processors which have physical memory populated that would back
|
||||
// lookup_address if it was resident.
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 populated_on_count; // Out
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
@@ -1210,8 +1220,6 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // In
|
||||
NvHandle client; // In
|
||||
NvHandle smc_part_ref; // In
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_NUMA_CHECK_AFFINITY_PARAMS;
|
||||
|
||||
@@ -30,18 +30,18 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch)
|
||||
batch->tree = tree;
|
||||
}
|
||||
|
||||
static NvU64 smallest_page_size(NvU64 page_sizes)
|
||||
static NvU32 smallest_page_size(NvU32 page_sizes)
|
||||
{
|
||||
UVM_ASSERT(page_sizes != 0);
|
||||
|
||||
return 1ULL << __ffs(page_sizes);
|
||||
return 1u << __ffs(page_sizes);
|
||||
}
|
||||
|
||||
static NvU64 biggest_page_size(NvU64 page_sizes)
|
||||
static NvU32 biggest_page_size(NvU32 page_sizes)
|
||||
{
|
||||
UVM_ASSERT(page_sizes != 0);
|
||||
|
||||
return 1ULL << __fls(page_sizes);
|
||||
return 1u << __fls(page_sizes);
|
||||
}
|
||||
|
||||
static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t *push)
|
||||
@@ -53,8 +53,8 @@ static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t
|
||||
|
||||
for (i = 0; i < batch->count; ++i) {
|
||||
uvm_tlb_batch_range_t *entry = &batch->ranges[i];
|
||||
NvU64 min_page_size = smallest_page_size(entry->page_sizes);
|
||||
NvU64 max_page_size = biggest_page_size(entry->page_sizes);
|
||||
NvU32 min_page_size = smallest_page_size(entry->page_sizes);
|
||||
NvU32 max_page_size = biggest_page_size(entry->page_sizes);
|
||||
|
||||
// Use the depth of the max page size as it's the broadest
|
||||
NvU32 depth = tree->hal->page_table_depth(max_page_size);
|
||||
@@ -113,7 +113,7 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
|
||||
tlb_batch_flush_invalidate_per_va(batch, push);
|
||||
}
|
||||
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar)
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
|
||||
{
|
||||
uvm_tlb_batch_range_t *new_entry;
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ typedef struct
|
||||
NvU64 size;
|
||||
|
||||
// Min and max page size ored together
|
||||
NvU64 page_sizes;
|
||||
NvU32 page_sizes;
|
||||
} uvm_tlb_batch_range_t;
|
||||
|
||||
struct uvm_tlb_batch_struct
|
||||
@@ -63,7 +63,7 @@ struct uvm_tlb_batch_struct
|
||||
NvU32 count;
|
||||
|
||||
// Biggest page size across all queued up invalidates
|
||||
NvU64 biggest_page_size;
|
||||
NvU32 biggest_page_size;
|
||||
|
||||
// Max membar across all queued up invalidates
|
||||
uvm_membar_t membar;
|
||||
@@ -81,7 +81,7 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch);
|
||||
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar will
|
||||
// be performed logically after the TLB invalidate such that all physical memory
|
||||
// accesses using the old translations are ordered to the scope of the membar.
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar);
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar);
|
||||
|
||||
// End a TLB invalidate batch
|
||||
//
|
||||
@@ -97,12 +97,8 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
|
||||
// Helper for invalidating a single range immediately.
|
||||
//
|
||||
// Internally begins and ends a TLB batch.
|
||||
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree,
|
||||
uvm_push_t *push,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_sizes,
|
||||
uvm_membar_t tlb_membar)
|
||||
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree, uvm_push_t *push,
|
||||
NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
|
||||
{
|
||||
uvm_tlb_batch_t batch;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user